提交 005115a1 编写于 作者: L liuruilong

format files, improve accuracy

上级 93a1705a
...@@ -10,37 +10,37 @@ import UIKit ...@@ -10,37 +10,37 @@ import UIKit
@UIApplicationMain @UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate { class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow? var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch. // Override point for customization after application launch.
return true return true
} }
func applicationWillResignActive(_ application: UIApplication) { func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
} }
func applicationDidEnterBackground(_ application: UIApplication) { func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
} }
func applicationWillEnterForeground(_ application: UIApplication) { func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
} }
func applicationDidBecomeActive(_ application: UIApplication) { func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
} }
func applicationWillTerminate(_ application: UIApplication) { func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
} }
} }
...@@ -16,51 +16,51 @@ import Foundation ...@@ -16,51 +16,51 @@ import Foundation
import paddle_mobile import paddle_mobile
public class MobileNet: Net{ public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel { class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil) super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
} }
}
class PreWords {
class PreWords { var contents: [String] = []
var contents: [String] = [] init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) { if let filePath = inBundle.path(forResource: fileName, ofType: type) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) { let string = try! String.init(contentsOfFile: filePath)
let string = try! String.init(contentsOfFile: filePath) contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{ String($0[$0.index($0.startIndex, offsetBy: 10)...])
String($0[$0.index($0.startIndex, offsetBy: 10)...]) }
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
} }
}else{
fatalError("no file call \(fileName)")
}
} }
subscript(index: Int) -> String {
return contents[index] let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
} }
}
override public init(device: MTLDevice) {
let labels = PreWords.init(fileName: "synset") super.init(device: device)
except = 0
override public func resultStr(res: [ResultHolder]) -> String { modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
let firstRes = res[0] paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
let resPointer = firstRes.result preprocessKernel = MobilenetPreProccess.init(device: device)
var s: [String] = [] inputDim = Dim.init(inDim: [1, 224, 224, 3])
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{ metalLoadMode = .LoadMetalInCustomMetalLib
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
} }
...@@ -14,13 +14,13 @@ kernel void mobilenet_preprocess( ...@@ -14,13 +14,13 @@ kernel void mobilenet_preprocess(
texture2d<float, access::write> outTexture [[texture(1)]], texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f); const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
kernel void mobilenet_preprocess_half( kernel void mobilenet_preprocess_half(
...@@ -28,11 +28,11 @@ kernel void mobilenet_preprocess_half( ...@@ -28,11 +28,11 @@ kernel void mobilenet_preprocess_half(
texture2d<half, access::write> outTexture [[texture(1)]], texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f); const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
...@@ -10,84 +10,84 @@ import UIKit ...@@ -10,84 +10,84 @@ import UIKit
import paddle_mobile import paddle_mobile
class ViewController: UIViewController { class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView! @IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView! @IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel! @IBOutlet weak var elapsedTimeLabel: UILabel!
var net: MobileNet! var net: MobileNet!
var runner: Runner! var runner: Runner!
var toPredictTexture: MTLTexture? var toPredictTexture: MTLTexture?
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
net = MobileNet.init(device: MetalHelper.shared.device)
runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
if let selectImage = UIImage.init(named: "banana.jpeg") { override func viewDidLoad() {
selectImageView.image = selectImage super.viewDidLoad()
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in GlobalConfig.shared.computePrecision = .Float16
self?.toPredictTexture = texture net = MobileNet.init(device: MetalHelper.shared.device)
} runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
if let selectImage = UIImage.init(named: "banana.jpeg") {
selectImageView.image = selectImage
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
}
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
} }
} @IBAction func clearAct(_ sender: Any) {
runner.clear()
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
} }
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
if let texture = toPredictTexture { @IBAction func predictAct(_ sender: Any) {
let beginDate = Date.init()
runner.predict(texture: texture) { [weak self] (success, resultHolder) in if let texture = toPredictTexture {
if success, let inResultHolder = resultHolder { let beginDate = Date.init()
let timeUse = Date.init().timeIntervalSince(beginDate) runner.predict(texture: texture) { [weak self] (success, resultHolder) in
DispatchQueue.main.async { if success, let inResultHolder = resultHolder {
self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms" let timeUse = Date.init().timeIntervalSince(beginDate)
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder) DispatchQueue.main.async {
} self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms"
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder)
}
} else {
print(" predict fail ")
}
}
} else { } else {
print(" predict fail ") print(" toPredictTexture is nil ")
} }
}
} else {
print(" toPredictTexture is nil ")
} }
}
} }
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else { guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
fatalError("no image") fatalError("no image")
} }
sSelf.selectImageView.image = image sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture sSelf.toPredictTexture = texture
}) })
}
} }
}
} }
...@@ -16,36 +16,36 @@ import UIKit ...@@ -16,36 +16,36 @@ import UIKit
@UIApplicationMain @UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate { class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow? var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool { func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch. // Override point for customization after application launch.
return true return true
} }
func applicationWillResignActive(_ application: UIApplication) { func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
} }
func applicationDidEnterBackground(_ application: UIApplication) { func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
} }
func applicationWillEnterForeground(_ application: UIApplication) { func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
} }
func applicationDidBecomeActive(_ application: UIApplication) { func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
} }
func applicationWillTerminate(_ application: UIApplication) { func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
} }
} }
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="4MS-jc-i6A"> <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait"> <device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/> <adaptation id="fullscreen"/>
</device> </device>
......
...@@ -18,14 +18,14 @@ import Foundation ...@@ -18,14 +18,14 @@ import Foundation
import paddle_mobile import paddle_mobile
@objc public class MetalHelper: NSObject { @objc public class MetalHelper: NSObject {
@objc let device: MTLDevice @objc let device: MTLDevice
@objc let queue: MTLCommandQueue @objc let queue: MTLCommandQueue
@objc let textureLoader: MTKTextureLoader @objc let textureLoader: MTKTextureLoader
@objc static let shared: MetalHelper = MetalHelper.init() @objc static let shared: MetalHelper = MetalHelper.init()
private override init(){ private override init(){
device = MTLCreateSystemDefaultDevice()! device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()! queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device) textureLoader = MTKTextureLoader.init(device: device)
super.init() super.init()
} }
} }
...@@ -16,51 +16,51 @@ import UIKit ...@@ -16,51 +16,51 @@ import UIKit
import paddle_mobile import paddle_mobile
class MultiPredictViewController: UIViewController { class MultiPredictViewController: UIViewController {
var runner1: Runner! var runner1: Runner!
var runner2: Runner! var runner2: Runner!
override func viewDidLoad() { override func viewDidLoad() {
super.viewDidLoad() super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device) let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device) let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue) runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue)
let queue2 = MetalHelper.shared.device.makeCommandQueue() let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue)
}
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue) @IBAction func predictAct(_ sender: Any) {
} let success = self.runner2.load()
// DispatchQueue.global().async {
@IBAction func predictAct(_ sender: Any) { let image1 = UIImage.init(named: "hand.jpg")
let success = self.runner2.load() // let success = self.runner2.load()
// DispatchQueue.global().async { // if success {
let image1 = UIImage.init(named: "hand.jpg") // for i in 0..<10000 {
// let success = self.runner2.load() // print(i)
// if success { // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// for i in 0..<10000 { // print("result1: ")
// print(i) //// print(res)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // })
// print("result1: ") // }
//// print(res) // } else {
// }) // print("load failed")
// } // }
// } else { // self.runner1.clear()
// print("load failed") // }
// } // return
// self.runner1.clear() // DispatchQueue.global().async {
// } //// sleep(1)
// return // let image1 = UIImage.init(named: "banana.jpeg")
// DispatchQueue.global().async { //// if success {
//// sleep(1) // for _ in 0..<10 {
// let image1 = UIImage.init(named: "banana.jpeg") // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
//// if success { // print("result2: ")
// for _ in 0..<10 { // print(res)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // })
// print("result2: ") // }
// print(res) //// } else {
// }) //// print("load failed")
// } //// }
//// } else { //// self.runner2.clear()
//// print("load failed") // }
//// } }
//// self.runner2.clear()
// }
}
} }
...@@ -20,30 +20,30 @@ ...@@ -20,30 +20,30 @@
#import <algorithm> #import <algorithm>
struct NMSParam { struct NMSParam {
float *score_data; float *score_data;
float *box_data; float *box_data;
float *output; float *output;
int output_size; int output_size;
std::vector<int> score_dim; std::vector<int> score_dim;
std::vector<int> box_dim; std::vector<int> box_dim;
float scoreThredshold; float scoreThredshold;
int nmsTopK; int nmsTopK;
int keepTopK; int keepTopK;
float nmsEta; float nmsEta;
float nmsThreshold; float nmsThreshold;
int background_label; int background_label;
}; };
...@@ -53,63 +53,63 @@ constexpr int kBBoxSize = 4; ...@@ -53,63 +53,63 @@ constexpr int kBBoxSize = 4;
template <class T> template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1, bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) { const std::pair<float, T>& pair2) {
return pair1.first > pair2.first; return pair1.first > pair2.first;
} }
template <class T> template <class T>
static inline void GetMaxScoreIndex( static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k, const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) { std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) { for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) { if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i)); sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
} }
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
} }
template <class T> template <class T>
static inline T BBoxArea(const T* box, const bool normalized) { static inline T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) { if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid // If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0. // (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.); return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else { } else {
// If coordinate values are not within range [0, 1]. const T w = box[2] - box[0];
return (w + 1) * (h + 1); const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
} }
}
} }
template <class T> template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2, static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) { const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) { box2[3] < box1[1]) {
return static_cast<T>(0.); return static_cast<T>(0.);
} else { } else {
const T inter_xmin = std::max(box1[0], box2[0]); const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]); const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]); const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]); const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin; const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin; const T inter_h = inter_ymax - inter_ymin;
const T inter_area = inter_w * inter_h; const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized); const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized); const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area); return inter_area / (bbox1_area + bbox2_area - inter_area);
} }
} }
template <typename T> template <typename T>
...@@ -120,40 +120,40 @@ static inline void NMSFast( ...@@ -120,40 +120,40 @@ static inline void NMSFast(
const T score_threshold, const T nms_threshold, const T score_threshold, const T nms_threshold,
const T eta, const int top_k, const T eta, const int top_k,
std::vector<int>* selected_indices) { std::vector<int>* selected_indices) {
// The total boxes for each instance. // The total boxes for each instance.
int num_boxes = bbox_dim[0]; int num_boxes = bbox_dim[0];
// 4: [xmin ymin xmax ymax] // 4: [xmin ymin xmax ymax]
int box_size = bbox_dim[1]; int box_size = bbox_dim[1];
std::vector<T> scores_data(num_boxes); std::vector<T> scores_data(num_boxes);
std::copy_n(score_data, num_boxes, scores_data.begin()); std::copy_n(score_data, num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices; std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices); GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear(); selected_indices->clear();
T adaptive_threshold = nms_threshold; T adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) { while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second; const int idx = sorted_indices.front().second;
bool keep = true; bool keep = true;
for (size_t k = 0; k < selected_indices->size(); ++k) { for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) { if (keep) {
const int kept_idx = (*selected_indices)[k]; const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size, T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true); bbox_data + kept_idx * box_size, true);
keep = overlap <= adaptive_threshold; keep = overlap <= adaptive_threshold;
} else { } else {
break; break;
} }
} }
if (keep) { if (keep) {
selected_indices->push_back(idx); selected_indices->push_back(idx);
} }
sorted_indices.erase(sorted_indices.begin()); sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) { if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta; adaptive_threshold *= eta;
}
} }
}
} }
template <typename T> template <typename T>
...@@ -165,48 +165,48 @@ void MultiClassNMS(const T *boxes_data, ...@@ -165,48 +165,48 @@ void MultiClassNMS(const T *boxes_data,
const int& background_label, const int& nms_top_k, const int& background_label, const int& nms_top_k,
const int& keep_top_k, const T& nms_threshold, const int& keep_top_k, const T& nms_threshold,
const T& nms_eta, const T& score_threshold) { const T& nms_eta, const T& score_threshold) {
int64_t class_num = score_dim[0];
int64_t predict_dim = score_dim[1];
int num_det = 0;
for (int c = 0; c < class_num; ++c) {
if (c == background_label) continue;
const T *score_data = scores_data + c * predict_dim;
/// [c] is key int64_t class_num = score_dim[0];
NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta, int64_t predict_dim = score_dim[1];
int num_det = 0;
for (int c = 0; c < class_num; ++c) {
if (c == background_label) continue;
const T *score_data = scores_data + c * predict_dim;
/// [c] is key
NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta,
nms_top_k, &((*indices)[c])); nms_top_k, &((*indices)[c]));
num_det += (*indices)[c].size(); num_det += (*indices)[c].size();
}
*num_nmsed_out = num_det;
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
// PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
} }
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), *num_nmsed_out = num_det;
SortScorePairDescend<std::pair<int, int>>); if (keep_top_k > -1 && num_det > keep_top_k) {
score_index_pairs.resize(keep_top_k); std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
// Store the new indices. int label = it.first;
std::map<int, std::vector<int>> new_indices; const T* sdata = scores_data + label * predict_dim;
for (size_t j = 0; j < score_index_pairs.size(); ++j) { const std::vector<int>& label_indices = it.second;
int label = score_index_pairs[j].second.first; for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = score_index_pairs[j].second.second; int idx = label_indices[j];
new_indices[label].push_back(idx); // PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
} }
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
} }
template <typename T> template <typename T>
...@@ -215,69 +215,69 @@ void MultiClassOutput(const T *scores_data, ...@@ -215,69 +215,69 @@ void MultiClassOutput(const T *scores_data,
const T *bboxes_data, const T *bboxes_data,
T *outputs_data, T *outputs_data,
const std::map<int, std::vector<int>>& selected_indices) { const std::map<int, std::vector<int>>& selected_indices) {
int predict_dim = score_dim[1]; int predict_dim = score_dim[1];
int count = 0; int count = 0;
for (const auto& it : selected_indices) { for (const auto& it : selected_indices) {
/// one batch /// one batch
int label = it.first; int label = it.first;
const T* sdata = scores_data + label * predict_dim; const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& indices = it.second; const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) { for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j]; int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize; const T* bdata = bboxes_data + idx * kBBoxSize;
outputs_data[count * kOutputDim] = label; // label outputs_data[count * kOutputDim] = label; // label
outputs_data[count * kOutputDim + 1] = sdata[idx]; // score outputs_data[count * kOutputDim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax // xmin, ymin, xmax, ymax
std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T)); std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T));
count++; count++;
}
} }
}
} }
void MultiClassNMSCompute(NMSParam *param) { void MultiClassNMSCompute(NMSParam *param) {
assert(param->score_dim[0] == 1); assert(param->score_dim[0] == 1);
assert(param->box_dim[0] == 1); assert(param->box_dim[0] == 1);
assert (param->score_dim.size() == 3); assert (param->score_dim.size() == 3);
assert(param->box_dim.size() == 3); assert(param->box_dim.size() == 3);
float* outputs; float* outputs;
auto background_label = param->background_label; auto background_label = param->background_label;
auto nms_top_k = param->nmsTopK; auto nms_top_k = param->nmsTopK;
auto keep_top_k = param->keepTopK; auto keep_top_k = param->keepTopK;
auto nms_threshold = param->nmsThreshold; auto nms_threshold = param->nmsThreshold;
auto nms_eta = param->nmsEta; auto nms_eta = param->nmsEta;
auto score_threshold = param->scoreThredshold; auto score_threshold = param->scoreThredshold;
std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]}; std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]};
std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]}; std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]};
std::vector<int> batch_starts = {0}; std::vector<int> batch_starts = {0};
std::map<int, std::vector<int>> indices; std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0; int num_nmsed_out = 0;
MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out, MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out,
background_label, nms_top_k, keep_top_k, nms_threshold, background_label, nms_top_k, keep_top_k, nms_threshold,
nms_eta, score_threshold); nms_eta, score_threshold);
batch_starts.push_back(batch_starts.back() + num_nmsed_out); batch_starts.push_back(batch_starts.back() + num_nmsed_out);
int output_size = 0; int output_size = 0;
int num_kept = batch_starts.back(); int num_kept = batch_starts.back();
if (num_kept == 0) { if (num_kept == 0) {
outputs = new float[1]; outputs = new float[1];
outputs[0] = -1; outputs[0] = -1;
output_size = 1; output_size = 1;
} else { } else {
outputs = new float[num_kept * kOutputDim]; outputs = new float[num_kept * kOutputDim];
int64_t s = batch_starts[0]; int64_t s = batch_starts[0];
int64_t e = batch_starts[1]; int64_t e = batch_starts[1];
if (e > s) { if (e > s) {
MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices); MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices);
}
output_size = num_kept * kOutputDim;
} }
output_size = num_kept * kOutputDim; param->output = outputs;
} param->output_size = output_size;
param->output = outputs;
param->output_size = output_size;
} }
@implementation CPUResult @implementation CPUResult
...@@ -286,31 +286,31 @@ void MultiClassNMSCompute(NMSParam *param) { ...@@ -286,31 +286,31 @@ void MultiClassNMSCompute(NMSParam *param) {
@implementation NMSCompute @implementation NMSCompute
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox { -(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
NMSParam param; NMSParam param;
param.box_data = bbox; param.box_data = bbox;
param.score_data = score; param.score_data = score;
param.background_label = self.background_label; param.background_label = self.background_label;
param.scoreThredshold = self.scoreThredshold; param.scoreThredshold = self.scoreThredshold;
param.nmsTopK = self.nmsTopK; param.nmsTopK = self.nmsTopK;
param.keepTopK = self.keepTopK; param.keepTopK = self.keepTopK;
param.nmsEta = self.nmsEta; param.nmsEta = self.nmsEta;
param.nmsThreshold = self.nmsThreshold; param.nmsThreshold = self.nmsThreshold;
std::vector<int> score_dim; std::vector<int> score_dim;
for (int i = 0; i < self.scoreDim.count; ++i) { for (int i = 0; i < self.scoreDim.count; ++i) {
score_dim.push_back(self.scoreDim[i].intValue); score_dim.push_back(self.scoreDim[i].intValue);
} }
param.score_dim = score_dim; param.score_dim = score_dim;
std::vector<int> box_dim; std::vector<int> box_dim;
for (int i = 0; i < self.bboxDim.count; ++i) { for (int i = 0; i < self.bboxDim.count; ++i) {
box_dim.push_back(self.bboxDim[i].intValue); box_dim.push_back(self.bboxDim[i].intValue);
} }
param.box_dim = box_dim; param.box_dim = box_dim;
MultiClassNMSCompute(&param); MultiClassNMSCompute(&param);
CPUResult *cr = [[CPUResult alloc] init]; CPUResult *cr = [[CPUResult alloc] init];
cr.output = param.output; cr.output = param.output;
cr.outputSize = param.output_size; cr.outputSize = param.output_size;
return cr; return cr;
} }
@end @end
......
...@@ -16,37 +16,37 @@ import Foundation ...@@ -16,37 +16,37 @@ import Foundation
import paddle_mobile import paddle_mobile
public class Genet: Net { public class Genet: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3]) inputDim = Dim.init(inDim: [1, 128, 128, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) { @objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device, super.init(device: device,
inParamPointer: inParamPointer, inParamPointer: inParamPointer,
inParamSize: inParamSize, inParamSize: inParamSize,
inModelPointer: inModelPointer, inModelPointer: inModelPointer,
inModelSize: inModelSize) inModelSize: inModelSize)
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3]) inputDim = Dim.init(inDim: [1, 128, 128, 3])
} }
class GenetPreProccess: CusomKernel { class GenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3) let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil) super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
} }
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
} }
...@@ -16,53 +16,53 @@ import Foundation ...@@ -16,53 +16,53 @@ import Foundation
import paddle_mobile import paddle_mobile
public class MobileNet: Net{ public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel { class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil) super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
} }
}else{
fatalError("no file call \(fileName)")
}
} }
subscript(index: Int) -> String {
return contents[index] class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
} }
}
let labels = PreWords.init(fileName: "synset")
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
override public func resultStr(res: [ResultHolder]) -> String { let resPointer = res[0].result
let resPointer = res[0].result var s: [String] = []
var s: [String] = [] (0..<res[0].capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
(0..<res[0].capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{ s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) }
return s.joined(separator: "\n")
} }
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) { override public init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib // metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " // metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device) preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
} }
...@@ -16,18 +16,18 @@ import Foundation ...@@ -16,18 +16,18 @@ import Foundation
import paddle_mobile import paddle_mobile
public class MobileNetCombined: Net { public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
override public func resultStr(res: [ResultHolder]) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... " return " \(res[0].result[0]) ... "
} }
} }
...@@ -16,84 +16,84 @@ import Foundation ...@@ -16,84 +16,84 @@ import Foundation
import paddle_mobile import paddle_mobile
public class MobileNet_ssd_hand: Net { public class MobileNet_ssd_hand: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 2 except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3]) inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
@objc override public init(device: MTLDevice,inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer inModePointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModePointer,inModelSize:inModelSize)
except = 2
modelPath = ""
paramPath = ""
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
}
@objc override public init(device: MTLDevice,inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer inModePointer: UnsafeMutableRawPointer, inModelSize: Int) {
override public func resultStr(res: [ResultHolder]) -> String { super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModePointer,inModelSize:inModelSize)
return " \(res[0])" except = 2
} modelPath = ""
paramPath = ""
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] { metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
// guard let interRes = paddleMobileRes.intermediateResults else { preprocessKernel = MobilenetssdPreProccess.init(device: device)
// fatalError(" need have inter result ") inputDim = Dim.init(inDim: [1, 300, 300, 3])
// } }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else { class MobilenetssdPreProccess: CusomKernel {
// fatalError(" need score ") init(device: MTLDevice) {
// } let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
// super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else { }
// fatalError() }
// }
// override public func resultStr(res: [ResultHolder]) -> String {
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) return " \(res[0])"
//// print("score: ") }
//// print(scoreFormatArr.strideArray())
//// override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ") // guard let interRes = paddleMobileRes.intermediateResults else {
//// print(bboxArr.strideArray()) // fatalError(" need have inter result ")
// // }
// let nmsCompute = NMSCompute.init() //
// nmsCompute.scoreThredshold = 0.01 // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// nmsCompute.nmsTopK = 400 // fatalError(" need score ")
// nmsCompute.keepTopK = 200 // }
// nmsCompute.nmsEta = 1.0 //
// nmsCompute.nmsThreshold = 0.45 // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// nmsCompute.background_label = 0; // fatalError()
// // }
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] //
// // var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] //// print("score: ")
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { //// print(scoreFormatArr.strideArray())
// fatalError( " result error " ) ////
// } // var bboxArr = bbox.metalTexture.float32Array()
// //// print("bbox: ")
// let output: [Float32] = result.map { $0.floatValue } //// print(bboxArr.strideArray())
// //
// // let nmsCompute = NMSCompute.init()
// return output // nmsCompute.scoreThredshold = 0.01
fatalError() // nmsCompute.nmsTopK = 400
} // nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0;
//
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
//
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " )
// }
//
// let output: [Float32] = result.map { $0.floatValue }
//
//
// return output
fatalError()
}
} }
...@@ -16,137 +16,137 @@ import Foundation ...@@ -16,137 +16,137 @@ import Foundation
import paddle_mobile import paddle_mobile
public class MobileNet_ssd_AR: Net { public class MobileNet_ssd_AR: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 2 except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3]) inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModelPointer,inModelSize:inModelSize)
except = 2
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0])"
}
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError()
// }
// let startDate = Date.init() @objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModelPointer,inModelSize:inModelSize)
except = 2
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
// print("scoreFormatArr: ") class MobilenetssdPreProccess: CusomKernel {
//print((0..<score.capacity).map{ score.result[$0] }.strideArray()) init(device: MTLDevice) {
// let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
// print("bbox arr: ") super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
// }
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray()) }
// let nmsCompute = NMSCompute.init() override public func resultStr(res: [ResultHolder]) -> String {
// nmsCompute.scoreThredshold = 0.25 return " \(res[0].result[0])"
// nmsCompute.nmsTopK = 100 }
// nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0 override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// nmsCompute.nmsThreshold = 0.449999988 fatalError()
// nmsCompute.background_label = 0; // guard let interRes = paddleMobileRes.intermediateResults else {
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])] // fatalError(" need have inter result ")
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])] // }
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else { //
// fatalError( " result error " ) // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// } // fatalError(" need score ")
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize)) // }
// for i in 0..<Int(result.outputSize) { //
// // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// print("i \(i) : \(result.output[i])") // fatalError()
// } // }
// print(Date.init().timeIntervalSince(startDate))
// let startDate = Date.init()
// print(resultHolder.result![0])
// return resultHolder // print("scoreFormatArr: ")
} //print((0..<score.capacity).map{ score.result[$0] }.strideArray())
//
// override func updateProgram(program: Program) { // print("bbox arr: ")
//
// for i in [56, 66, 76, 86, 93, 99] { // print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let nmsCompute = NMSCompute.init()
// let v = program.scope[output]! // nmsCompute.scoreThredshold = 0.25
// let originTexture = v as! Texture // nmsCompute.nmsTopK = 100
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) // nmsCompute.keepTopK = 100
// // nmsCompute.nmsEta = 1.0
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) // nmsCompute.nmsThreshold = 0.449999988
// // nmsCompute.background_label = 0;
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) // nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// // nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// program.scope[output] = originTexture // guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// // fatalError( " result error " )
// if i == 99 { // }
// opDesc.attrs["axis"] = 0 // let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// } else { // for i in 0..<Int(result.outputSize) {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } //
// } // print("i \(i) : \(result.output[i])")
// } // }
// // print(Date.init().timeIntervalSince(startDate))
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i] // print(resultHolder.result![0])
// let output = opDesc.outputs["Out"]!.first! // return resultHolder
// let v = program.scope[output]! }
//
// // override func updateProgram(program: Program) {
//
// let originTexture = v as! Texture // for i in [56, 66, 76, 86, 93, 99] {
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // let opDesc = program.programDesc.blocks[0].ops[i]
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // let output = opDesc.outputs["Out"]!.first!
// } // let v = program.scope[output]!
// // let originTexture = v as! Texture
// for i in [60, 101, 90, 97, 70, 80] { // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
// let opDesc = program.programDesc.blocks[0].ops[i] //
// let output = opDesc.outputs["Out"]!.first! // originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
// let v = program.scope[output]! //
// let originTexture = v as! Texture // originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) //
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // program.scope[output] = originTexture
// } //
// // if i == 99 {
// for i in [102] { // opDesc.attrs["axis"] = 0
// let opDesc = program.programDesc.blocks[0].ops[i] // } else {
// for output in opDesc.outputs["Out"]! { // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// let v = program.scope[output]! // }
// let originTexture = v as! Texture // }
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) //
// } // for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // let opDesc = program.programDesc.blocks[0].ops[i]
// print(" split axis \(opDesc.attrs["axis"])") // let output = opDesc.outputs["Out"]!.first!
// } // let v = program.scope[output]!
//
//
//
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
//
// for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// }
//
// for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])")
// }
// 99 // 99
// } // }
} }
...@@ -17,9 +17,9 @@ using namespace metal; ...@@ -17,9 +17,9 @@ using namespace metal;
kernel void mobilenet_preprocess( kernel void mobilenet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]], texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
...@@ -31,9 +31,9 @@ kernel void mobilenet_preprocess( ...@@ -31,9 +31,9 @@ kernel void mobilenet_preprocess(
} }
kernel void mobilenet_preprocess_half( kernel void mobilenet_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]], texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
...@@ -45,9 +45,9 @@ kernel void mobilenet_preprocess_half( ...@@ -45,9 +45,9 @@ kernel void mobilenet_preprocess_half(
} }
kernel void mobilenet_ssd_preprocess( kernel void mobilenet_ssd_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]], texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
...@@ -59,9 +59,9 @@ kernel void mobilenet_ssd_preprocess( ...@@ -59,9 +59,9 @@ kernel void mobilenet_ssd_preprocess(
} }
kernel void mobilenet_ssd_preprocess_half( kernel void mobilenet_ssd_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]], texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]]) uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
...@@ -74,44 +74,44 @@ kernel void mobilenet_ssd_preprocess_half( ...@@ -74,44 +74,44 @@ kernel void mobilenet_ssd_preprocess_half(
kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f); const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
kernel void genet_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) kernel void genet_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f); const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
kernel void mobilent_ar_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) kernel void mobilent_ar_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f); const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{ {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f); const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
...@@ -17,18 +17,18 @@ import Foundation ...@@ -17,18 +17,18 @@ import Foundation
import paddle_mobile import paddle_mobile
public class YoloNet: Net { public class YoloNet: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3]) inputDim = Dim.init(inDim: [1, 416, 416, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
override public func resultStr(res: [ResultHolder]) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... " return " \(res[0].result[0]) ... "
} }
} }
...@@ -34,83 +34,83 @@ ...@@ -34,83 +34,83 @@
@implementation LoadPointerViewController @implementation LoadPointerViewController
- (void)viewDidLoad { - (void)viewDidLoad {
[super viewDidLoad]; [super viewDidLoad];
self.imageView.image = [UIImage imageNamed:@"banana.jpeg"]; self.imageView.image = [UIImage imageNamed:@"banana.jpeg"];
NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"super_model" withExtension:nil].path; NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"super_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"super_params" withExtension:nil].path; NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"super_params" withExtension:nil].path;
long fileSize; long fileSize;
FILE *fp; FILE *fp;
fp = fopen([modelPath UTF8String], "rb"); fp = fopen([modelPath UTF8String], "rb");
fseek(fp, 0, SEEK_END); fseek(fp, 0, SEEK_END);
fileSize = ftell(fp); fileSize = ftell(fp);
rewind(fp); rewind(fp);
void *buffer = malloc(fileSize); void *buffer = malloc(fileSize);
fread(buffer, 1, fileSize, fp); fread(buffer, 1, fileSize, fp);
fclose(fp); fclose(fp);
long paramfileSize; long paramfileSize;
FILE *parmaFilePointer; FILE *parmaFilePointer;
parmaFilePointer = fopen([paramPath UTF8String], "rb"); parmaFilePointer = fopen([paramPath UTF8String], "rb");
fseek(parmaFilePointer, 0, SEEK_END); fseek(parmaFilePointer, 0, SEEK_END);
paramfileSize = ftell(parmaFilePointer); paramfileSize = ftell(parmaFilePointer);
rewind(parmaFilePointer); rewind(parmaFilePointer);
void *parmaBuffer = malloc(paramfileSize); void *parmaBuffer = malloc(paramfileSize);
fread(parmaBuffer, 1, paramfileSize, parmaFilePointer); fread(parmaBuffer, 1, paramfileSize, parmaFilePointer);
fclose(parmaFilePointer); fclose(parmaFilePointer);
_modelConfig = [[ModelConfig alloc] init]; _modelConfig = [[ModelConfig alloc] init];
_modelConfig.modelPointer = buffer; _modelConfig.modelPointer = buffer;
_modelConfig.modelSize = (int)fileSize; _modelConfig.modelSize = (int)fileSize;
_modelConfig.paramPointer = parmaBuffer; _modelConfig.paramPointer = parmaBuffer;
_modelConfig.paramSize = (int)paramfileSize; _modelConfig.paramSize = (int)paramfileSize;
} }
- (IBAction)loaderButtonPressed:(id)sender { - (IBAction)loaderButtonPressed:(id)sender {
self.paddleMobile = [[PaddleMobileGPU alloc] initWithCommandQueue:MetalHelper.shared.queue net:SuperResolutionNetType modelConfig:_modelConfig]; self.paddleMobile = [[PaddleMobileGPU alloc] initWithCommandQueue:MetalHelper.shared.queue net:SuperResolutionNetType modelConfig:_modelConfig];
_loaded = [self.paddleMobile load]; _loaded = [self.paddleMobile load];
NSLog(@" load 结果: %@", _loaded ? @"成功" : @"失败"); NSLog(@" load 结果: %@", _loaded ? @"成功" : @"失败");
} }
- (IBAction)predictButtonPressed:(id)sender { - (IBAction)predictButtonPressed:(id)sender {
[self predict]; [self predict];
} }
- (void)predict { - (void)predict {
UIImage *image = self.imageView.image; UIImage *image = self.imageView.image;
if (!image) { if (!image) {
NSLog(@" image is nil"); NSLog(@" image is nil");
return; return;
} }
id<MTLTexture> texture = [MetalHelper.shared.textureLoader newTextureWithCGImage:image.CGImage options:nil error:nil]; id<MTLTexture> texture = [MetalHelper.shared.textureLoader newTextureWithCGImage:image.CGImage options:nil error:nil];
_texture = texture; _texture = texture;
if (!_texture) { if (!_texture) {
NSLog(@" texture is nil"); NSLog(@" texture is nil");
return; return;
} }
if (!self.loaded) { if (!self.loaded) {
NSLog(@" not load "); NSLog(@" not load ");
return; return;
} }
NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970]; NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
NSInteger max = 1; NSInteger max = 1;
for (int i = 0;i < max; i ++) { for (int i = 0;i < max; i ++) {
[self.paddleMobile predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) { [self.paddleMobile predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
if (success) { if (success) {
if (i == max -1) { if (i == max -1) {
double time = [[NSDate date] timeIntervalSince1970] - startTime; double time = [[NSDate date] timeIntervalSince1970] - startTime;
time = (time/max)*1000; time = (time/max)*1000;
NSLog(@"gap ==== %fms",time); NSLog(@"gap ==== %fms",time);
} }
} }
}]; }];
} }
} }
- (IBAction)clear:(id)sender { - (IBAction)clear:(id)sender {
[self.paddleMobile clear]; [self.paddleMobile clear];
self.loaded = NO; self.loaded = NO;
} }
@end @end
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
typedef enum : NSUInteger { typedef enum : NSUInteger {
SuperResolutionNetType, SuperResolutionNetType,
MobileNetSSDType MobileNetSSDType
} NetType; } NetType;
@interface PaddleMobileGPUResult: NSObject @interface PaddleMobileGPUResult: NSObject
......
...@@ -30,75 +30,75 @@ ...@@ -30,75 +30,75 @@
@implementation PaddleMobileGPUResult @implementation PaddleMobileGPUResult
- (void)setOutputResult:(ResultHolder *)resultHolder { - (void)setOutputResult:(ResultHolder *)resultHolder {
self.resultHolder = resultHolder; self.resultHolder = resultHolder;
self.output = resultHolder.result; self.output = resultHolder.result;
self.outputSize = resultHolder.capacity; self.outputSize = resultHolder.capacity;
} }
-(void)releaseOutput { -(void)releaseOutput {
[self.resultHolder releasePointer]; [self.resultHolder releasePointer];
} }
@end @end
@interface PaddleMobileGPU () @interface PaddleMobileGPU ()
{ {
Runner *runner; Runner *runner;
} }
@end @end
@implementation PaddleMobileGPU @implementation PaddleMobileGPU
-(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config { -(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config {
self = [super init]; self = [super init];
if (self) { if (self) {
Net *net = nil; Net *net = nil;
if (netType == SuperResolutionNetType) { if (netType == SuperResolutionNetType) {
net = [[SuperResolutionNet alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize]; net = [[SuperResolutionNet alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
} else if (netType == MobileNetSSDType) { } else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize]; net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
}
runner = [[Runner alloc] initInNet:net commandQueue:queue];
} }
runner = [[Runner alloc] initInNet:net commandQueue:queue]; return self;
}
return self;
} }
-(BOOL)load { -(BOOL)load {
return [runner load]; return [runner load];
} }
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSArray <NSNumber *>*> *))completion { -(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSArray <NSNumber *>*> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) { [runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray<NSMutableArray <NSNumber *>*> *ocResultArray = [NSMutableArray arrayWithCapacity:resultArr.count]; NSMutableArray<NSMutableArray <NSNumber *>*> *ocResultArray = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) { for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *resultHolder = resultArr[i]; ResultHolder *resultHolder = resultArr[i];
NSMutableArray <NSNumber *>*res = [NSMutableArray arrayWithCapacity:resultHolder.capacity]; NSMutableArray <NSNumber *>*res = [NSMutableArray arrayWithCapacity:resultHolder.capacity];
for (int j = 0; j < resultHolder.capacity; ++j) { for (int j = 0; j < resultHolder.capacity; ++j) {
[res addObject:[NSNumber numberWithFloat:resultHolder.result[i]]]; [res addObject:[NSNumber numberWithFloat:resultHolder.result[i]]];
} }
[ocResultArray addObject:res]; [ocResultArray addObject:res];
[resultHolder releasePointer]; [resultHolder releasePointer];
} }
completion(success, ocResultArray); completion(success, ocResultArray);
}]; }];
} }
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, NSArray <PaddleMobileGPUResult *> *))completion { -(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, NSArray <PaddleMobileGPUResult *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) { [runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray <PaddleMobileGPUResult *> *ocResultArr = [NSMutableArray arrayWithCapacity:resultArr.count]; NSMutableArray <PaddleMobileGPUResult *> *ocResultArr = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) { for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *result = resultArr[i]; ResultHolder *result = resultArr[i];
PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init]; PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
gpuResult.dim = result.dim; gpuResult.dim = result.dim;
[gpuResult setOutputResult:result]; [gpuResult setOutputResult:result];
[ocResultArr addObject:gpuResult]; [ocResultArr addObject:gpuResult];
} }
completion(success, ocResultArr); completion(success, ocResultArr);
}]; }];
} }
-(void)clear { -(void)clear {
[runner clear]; [runner clear];
} }
@end @end
...@@ -16,57 +16,57 @@ import Foundation ...@@ -16,57 +16,57 @@ import Foundation
import paddle_mobile import paddle_mobile
@objc public class SuperResolutionNet: Net{ @objc public class SuperResolutionNet: Net{
override public func resultStr(res: [ResultHolder]) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return "未实现" return "未实现"
} }
public override init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize: Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) { public override init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize: Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
self.paramPointer = inParamPointer self.paramPointer = inParamPointer
self.paramSize = inParamSize self.paramSize = inParamSize
self.modelPointer = inModelPointer self.modelPointer = inModelPointer
self.modelSize = inModelSize self.modelSize = inModelSize
} }
@objc override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice) { override public func updateProgram(program: Program) {
super.init(device: device) // n h w c
except = 0 for block in program.programDesc.blocks {
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null" for varDesc in block.vars {
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null" if !varDesc.persistable {
preprocessKernel = nil if varDesc.type == .LodTensor {
inputDim = Dim.init(inDim: [1, 224, 224, 1]) let varEle = program.scope.vars[varDesc.name]
metalLoadMode = .LoadMetalInCustomMetalLib if let texture = varEle as? Texture {
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]])
} print(" var desc name " + varDesc.name + " new dim" + "\(newDim)")
override public func updateProgram(program: Program) { texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
// n h w c texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
for block in program.programDesc.blocks {
for varDesc in block.vars { let output: FetchHolder = program.scope.output() as! FetchHolder
if !varDesc.persistable { output.dim = newDim
if varDesc.type == .LodTensor { output.capacity = newDim.numel()
let varEle = program.scope.vars[varDesc.name] output.paddedCapacity = newDim.numel() * 4
if let texture = varEle as? Texture { output.initBuffer(device: device)
let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]]) }
print(" var desc name " + varDesc.name + " new dim" + "\(newDim)") }
}
texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
let output: FetchHolder = program.scope.output() as! FetchHolder
output.dim = newDim
output.capacity = newDim.numel()
output.paddedCapacity = newDim.numel() * 4
output.initBuffer(device: device)
} }
}
} }
}
} }
}
} }
...@@ -4,28 +4,28 @@ import Foundation ...@@ -4,28 +4,28 @@ import Foundation
import QuartzCore import QuartzCore
public class FPSCounter { public class FPSCounter {
private(set) public var fps: Double = 0 private(set) public var fps: Double = 0
var frames = 0 var frames = 0
var startTime: CFTimeInterval = 0 var startTime: CFTimeInterval = 0
public func start() { public func start() {
frames = 0
startTime = CACurrentMediaTime()
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
frames = 0 frames = 0
startTime = CACurrentMediaTime() startTime = CACurrentMediaTime()
}
} }
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
frames = 0
startTime = CACurrentMediaTime()
}
}
}
} }
...@@ -6,15 +6,15 @@ import AVFoundation ...@@ -6,15 +6,15 @@ import AVFoundation
@available(iOS 10.0, *) @available(iOS 10.0, *)
@objc public protocol VideoCaptureDelegate: NSObjectProtocol { @objc public protocol VideoCaptureDelegate: NSObjectProtocol {
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime) @objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) @objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?) @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?) @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?)
} }
/** /**
Simple interface to the iPhone's camera. Simple interface to the iPhone's camera.
*/ */
@available(iOS 10.0, *) @available(iOS 10.0, *)
public class VideoCapture: NSObject { public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer? public var previewLayer: AVCaptureVideoPreviewLayer?
...@@ -35,9 +35,9 @@ public class VideoCapture: NSObject { ...@@ -35,9 +35,9 @@ public class VideoCapture: NSObject {
self.cameraPosition = position self.cameraPosition = position
super.init() super.init()
} }
public func setUp(sessionPreset: AVCaptureSession.Preset = .medium, public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
completion: @escaping (Bool) -> Void) { completion: @escaping (Bool) -> Void) {
queue.async { queue.async {
let success = self.setUpCamera(sessionPreset: sessionPreset) let success = self.setUpCamera(sessionPreset: sessionPreset)
DispatchQueue.main.async { DispatchQueue.main.async {
...@@ -45,7 +45,7 @@ public class VideoCapture: NSObject { ...@@ -45,7 +45,7 @@ public class VideoCapture: NSObject {
} }
} }
} }
func fontCamera() -> AVCaptureDevice? { func fontCamera() -> AVCaptureDevice? {
let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices
return deveices.first return deveices.first
...@@ -62,7 +62,7 @@ public class VideoCapture: NSObject { ...@@ -62,7 +62,7 @@ public class VideoCapture: NSObject {
captureSession.beginConfiguration() captureSession.beginConfiguration()
captureSession.sessionPreset = sessionPreset captureSession.sessionPreset = sessionPreset
var oCaptureDevice: AVCaptureDevice? var oCaptureDevice: AVCaptureDevice?
switch cameraPosition { switch cameraPosition {
case .back: case .back:
...@@ -79,56 +79,56 @@ public class VideoCapture: NSObject { ...@@ -79,56 +79,56 @@ public class VideoCapture: NSObject {
print("Error: no video devices available") print("Error: no video devices available")
return false return false
} }
guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
print("Error: could not create AVCaptureDeviceInput") print("Error: could not create AVCaptureDeviceInput")
return false return false
} }
if captureSession.canAddInput(videoInput) { if captureSession.canAddInput(videoInput) {
captureSession.addInput(videoInput) captureSession.addInput(videoInput)
} }
let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
previewLayer.connection?.videoOrientation = self.videoOrientation previewLayer.connection?.videoOrientation = self.videoOrientation
self.previewLayer = previewLayer self.previewLayer = previewLayer
let settings: [String : Any] = [ let settings: [String : Any] = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
] ]
videoOutput.videoSettings = settings videoOutput.videoSettings = settings
videoOutput.alwaysDiscardsLateVideoFrames = true videoOutput.alwaysDiscardsLateVideoFrames = true
videoOutput.setSampleBufferDelegate(self, queue: queue) videoOutput.setSampleBufferDelegate(self, queue: queue)
if captureSession.canAddOutput(videoOutput) { if captureSession.canAddOutput(videoOutput) {
captureSession.addOutput(videoOutput) captureSession.addOutput(videoOutput)
} }
// We want the buffers to be in portrait orientation otherwise they are // We want the buffers to be in portrait orientation otherwise they are
// rotated by 90 degrees. Need to set this _after_ addOutput()! // rotated by 90 degrees. Need to set this _after_ addOutput()!
videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation
if captureSession.canAddOutput(photoOutput) { if captureSession.canAddOutput(photoOutput) {
captureSession.addOutput(photoOutput) captureSession.addOutput(photoOutput)
} }
captureSession.commitConfiguration() captureSession.commitConfiguration()
return true return true
} }
public func start() { public func start() {
if !captureSession.isRunning { if !captureSession.isRunning {
captureSession.startRunning() captureSession.startRunning()
} }
} }
public func stop() { public func stop() {
if captureSession.isRunning { if captureSession.isRunning {
captureSession.stopRunning() captureSession.stopRunning()
} }
} }
/* Captures a single frame of the camera input. */ /* Captures a single frame of the camera input. */
public func capturePhoto() { public func capturePhoto() {
let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)]) let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)])
...@@ -139,7 +139,7 @@ public class VideoCapture: NSObject { ...@@ -139,7 +139,7 @@ public class VideoCapture: NSObject {
] ]
photoOutput.capturePhoto(with: settings, delegate: self) photoOutput.capturePhoto(with: settings, delegate: self)
} }
func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? { func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? {
if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer) let width = CVPixelBufferGetWidth(imageBuffer)
...@@ -152,7 +152,7 @@ public class VideoCapture: NSObject { ...@@ -152,7 +152,7 @@ public class VideoCapture: NSObject {
} }
return nil return nil
} }
func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? { func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? {
if let sampleBuffer = sampleBuffer, if let sampleBuffer = sampleBuffer,
let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
...@@ -172,47 +172,47 @@ public class VideoCapture: NSObject { ...@@ -172,47 +172,47 @@ public class VideoCapture: NSObject {
@available(iOS 10.0, *) @available(iOS 10.0, *)
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview, // Because lowering the capture device's FPS looks ugly in the preview,
// we capture at full speed but only call the delegate at its desired // we capture at full speed but only call the delegate at its desired
// framerate. If `fps` is -1, we run at the full framerate. // framerate. If `fps` is -1, we run at the full framerate.
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let deltaTime = timestamp - lastTimestamp let deltaTime = timestamp - lastTimestamp
if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) { if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
lastTimestamp = timestamp lastTimestamp = timestamp
self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp) self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{ if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
let texture = convertToMTLTexture(sampleBuffer: sampleBuffer) let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp) delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
}
} }
} }
}
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { print("dropped frame")
print("dropped frame") }
}
} }
@available(iOS 10.0, *) @available(iOS 10.0, *)
extension VideoCapture: AVCapturePhotoCaptureDelegate { extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput, public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?, didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?, previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
resolvedSettings: AVCaptureResolvedPhotoSettings, resolvedSettings: AVCaptureResolvedPhotoSettings,
bracketSettings: AVCaptureBracketedStillImageSettings?, bracketSettings: AVCaptureBracketedStillImageSettings?,
error: Error?) { error: Error?) {
var imageTexture: MTLTexture? var imageTexture: MTLTexture?
var previewImage: UIImage? var previewImage: UIImage?
if error == nil { if error == nil {
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{ if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer) imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture) self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
} }
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{ if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer) previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhoto: previewImage) self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
}
} }
} }
}
} }
...@@ -19,265 +19,242 @@ import paddle_mobile ...@@ -19,265 +19,242 @@ import paddle_mobile
import MetalPerformanceShaders import MetalPerformanceShaders
class FileReader { class FileReader {
let file: UnsafeMutablePointer<FILE> let file: UnsafeMutablePointer<FILE>
let fileSize: Int let fileSize: Int
init(paramPath: String) throws { init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else { guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath) throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
} }
file = tmpFile
fseek(file, 0, SEEK_END) deinit {
fileSize = ftell(file) fclose(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
} }
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
}
deinit {
fclose(file)
}
} }
enum Platform { enum Platform {
case GPU case GPU
} }
let platformSupport: [(Platform, String)] = [(.GPU, "GPU")] let platformSupport: [(Platform, String)] = [(.GPU, "GPU")]
enum SupportModel: String{ enum SupportModel: String{
case yolo = "yolo" case yolo = "yolo"
case mobilenet_combined = "mobilenet_combined" case mobilenet_combined = "mobilenet_combined"
case super_resolution = "superresoltion" case super_resolution = "superresoltion"
case mobilenet = "mobilenet" case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] { static func supportedModels() -> [SupportModel] {
return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet] return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet]
} }
} }
let netSupport: [SupportModel : Net] = [ let netSupport: [SupportModel : Net] = [
.super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device), .super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device),
.yolo : YoloNet.init(device: MetalHelper.shared.device), .yolo : YoloNet.init(device: MetalHelper.shared.device),
.mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device), .mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device),
.mobilenet : MobileNet.init(device: MetalHelper.shared.device)] .mobilenet : MobileNet.init(device: MetalHelper.shared.device)]
class ViewController: UIViewController { class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView! @IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView! @IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel! @IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView! @IBOutlet weak var videoView: UIView!
// var videoCapture: VideoCapture! // var videoCapture: VideoCapture!
var selectImage: UIImage? var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>? var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0] var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture? var toPredictTexture: MTLTexture?
var runner: Runner! var runner: Runner!
var platform: Platform = .GPU var platform: Platform = .GPU
var threadNum = 1 var threadNum = 1
@IBAction func loadAct(_ sender: Any) { @IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue) runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU { if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil) // let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!) // let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read() // let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
// //
// //
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared) // let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
// //
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize) // buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil { if self.toPredictTexture == nil {
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in // runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture // self?.toPredictTexture = texture
// } // }
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture
}
}
} else {
fatalError( " unsupport " )
}
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in if runner.load() {
self?.toPredictTexture = texture print(" load success ! ")
} else {
print(" load error ! ")
} }
}
} else {
fatalError( " unsupport " )
} }
if runner.load() { @IBAction func selectImageAct(_ sender: Any) {
print(" load success ! ") let imagePicker = UIImagePickerController()
} else { imagePicker.sourceType = .camera
print(" load error ! ") imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
} }
}
@IBAction func clearAct(_ sender: Any) {
@IBAction func selectImageAct(_ sender: Any) { runner.clear()
let imagePicker = UIImagePickerController() }
imagePicker.sourceType = .camera
imagePicker.delegate = self @IBAction func predictAct(_ sender: Any) {
self.present(imagePicker, animated: true, completion: nil) let max = 1
} switch platform {
case .GPU:
@IBAction func clearAct(_ sender: Any) { guard let inTexture = toPredictTexture else {
runner.clear() resultTextView.text = "请选择图片 ! "
} return
}
@IBAction func predictAct(_ sender: Any) {
let max = 1
switch platform {
case .GPU:
guard let inTexture = toPredictTexture else {
resultTextView.text = "请选择图片 ! "
return
}
let startDate = Date.init()
for i in 0..<max {
self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
if success, let inResultHolderArr = resultHolder {
let inResultHolder = inResultHolderArr[0]
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray()) let startDate = Date.init()
DispatchQueue.main.async { for i in 0..<max {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!) self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms" guard let sSelf = self else {
} fatalError()
}
if success, let inResultHolderArr = resultHolder {
let inResultHolder = inResultHolderArr[0]
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray())
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
DispatchQueue.main.async {
resultHolder?.first?.releasePointer()
}
}
} }
}
DispatchQueue.main.async {
resultHolder?.first?.releasePointer()
}
} }
}
} }
}
override func viewDidLoad() {
super.viewDidLoad()
modelPickerView.delegate = self override func viewDidLoad() {
modelPickerView.dataSource = self super.viewDidLoad()
threadPickerView.delegate = self
threadPickerView.dataSource = self GlobalConfig.shared.computePrecision = .Float16
if let image = UIImage.init(named: "classify-img-output.png") { GlobalConfig.shared.debug = false
selectImage = image
selectImageView.image = image modelPickerView.delegate = self
} else { modelPickerView.dataSource = self
print("请添加测试图片") threadPickerView.delegate = self
threadPickerView.dataSource = self
if let image = UIImage.init(named: "00001.jpg") {
selectImage = image
selectImageView.image = image
} else {
print("请添加测试图片")
}
} }
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
}
} }
extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
func numberOfComponents(in pickerView: UIPickerView) -> Int { func numberOfComponents(in pickerView: UIPickerView) -> Int {
if pickerView == modelPickerView { if pickerView == modelPickerView {
return 1 return 1
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
return 1 return 1
} else { } else {
fatalError() fatalError()
}
} }
}
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int { if pickerView == modelPickerView {
if pickerView == modelPickerView { return SupportModel.supportedModels().count
return SupportModel.supportedModels().count } else if pickerView == threadPickerView {
} else if pickerView == threadPickerView { return platformSupport.count
return platformSupport.count } else {
} else { fatalError()
fatalError() }
} }
}
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? { if pickerView == modelPickerView {
if pickerView == modelPickerView { return SupportModel.supportedModels()[row].rawValue
return SupportModel.supportedModels()[row].rawValue } else if pickerView == threadPickerView {
} else if pickerView == threadPickerView { return platformSupport[row].1
return platformSupport[row].1 } else {
} else { fatalError()
fatalError() }
} }
}
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) { if pickerView == modelPickerView {
if pickerView == modelPickerView { self.modelType = SupportModel.supportedModels()[row]
self.modelType = SupportModel.supportedModels()[row] } else if pickerView == threadPickerView {
} else if pickerView == threadPickerView { platform = platformSupport[row].0
platform = platformSupport[row].0 } else {
} else { fatalError()
fatalError() }
} }
}
} }
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{
fatalError("no image") fatalError("no image")
} }
sSelf.selectImage = image sSelf.selectImage = image
sSelf.selectImageView.image = image sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture sSelf.toPredictTexture = texture
}) })
}
} }
}
} }
var bool1 = false var bool1 = false
extension ViewController: VideoCaptureDelegate{ extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){ func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0]) // print(resultHolder!.result![0])
resultHolder?.first?.releasePointer() resultHolder?.first?.releasePointer()
}) })
}
} }
}
} }
......
...@@ -326,9 +326,10 @@ ...@@ -326,9 +326,10 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO; ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES; MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos; SDKROOT = iphoneos;
}; };
name = Debug; name = Debug;
...@@ -337,9 +338,10 @@ ...@@ -337,9 +338,10 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO; ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = NO; MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES; MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos; SDKROOT = iphoneos;
}; };
name = Release; name = Release;
......
...@@ -20,23 +20,23 @@ kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0 ...@@ -20,23 +20,23 @@ kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0
const device float4 * nscale [[buffer(0)]], const device float4 * nscale [[buffer(0)]],
const device float4 * nbias [[buffer(1)]], const device float4 * nbias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z); const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z]; float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]], kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]], const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]], const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z); const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z]; half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -15,10 +15,10 @@ struct MetalConvParam { ...@@ -15,10 +15,10 @@ struct MetalConvParam {
}; };
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]], kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]], texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]], const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]], const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
...@@ -32,5 +32,5 @@ kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture ...@@ -32,5 +32,5 @@ kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture
input = inTexture.sample(sample, gid.x, gid.y, gid.z); input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0); output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -21,29 +21,29 @@ ...@@ -21,29 +21,29 @@
#define VECTOR(p, n) CONCAT2(p, n) #define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]], kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> output [[texture(1)]], texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]], constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r; VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) { if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z); r = input.read(gid.xy, gid.z);
} else { } else {
P w = gid.x * pm.ratio_w; P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h; P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h; uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1; uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0; P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda; P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0; if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0; if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z); VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z); VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z); VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z); VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3); + h1lambda * (w2lambda * r2 + w1lambda * r3);
} }
output.write(r, gid.xy, gid.z); output.write(r, gid.xy, gid.z);
} }
#endif #endif
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
using namespace metal; using namespace metal;
struct bilinear_interp_param { struct bilinear_interp_param {
float ratio_h; float ratio_h;
float ratio_w; float ratio_w;
}; };
#define P float #define P float
......
...@@ -20,35 +20,35 @@ ...@@ -20,35 +20,35 @@
#define FUNC(f, p) CONCAT2_(f, p) #define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n) #define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]], kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
texture2d_array<P, access::read> priorBoxVar [[texture(1)]], texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]], texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]], texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z); VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z); VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t; VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0]; t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0]; t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0]; t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0]; t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2; P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2; P py = (p.y + p.w) / 2;
P pw = p.z - p.x; P pw = p.z - p.x;
P ph = p.w - p.y; P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px; P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py; P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw; P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph; P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r; VECTOR(P, 4) r;
r.x = tx - tw / 2; r.x = tx - tw / 2;
r.y = ty - th / 2; r.y = ty - th / 2;
r.z = tx + tw / 2; r.z = tx + tw / 2;
r.w = ty + th / 2; r.w = ty + th / 2;
output.write(r, gid.xy, gid.z); output.write(r, gid.xy, gid.z);
} }
#endif #endif
...@@ -13,24 +13,24 @@ kernel void buffer_to_texture_kernel( ...@@ -13,24 +13,24 @@ kernel void buffer_to_texture_kernel(
const device float *input [[buffer(0)]], const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){ uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
float y = input[outTexture.get_width() * gid.y + gid.x]; float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid); outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
} }
kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]], kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){ uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) { gid.y >= outTexture.get_height()) {
return; return;
} }
float y = input[outTexture.get_width() * gid.y + gid.x]; float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid); outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
} }
...@@ -17,104 +17,104 @@ using namespace metal; ...@@ -17,104 +17,104 @@ using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) { inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] = 0; abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3]; abcd[3] = xyzn[0] * 4 + xyzn[3];
} }
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) { inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = 0; abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1]; abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3]; abcd[3] = xyzn[0] * 4 + xyzn[3];
} }
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) { inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] = 0; abcd[0] = 0;
abcd[3] = xyzn[0]; abcd[3] = xyzn[0];
abcd[2] = xyzn[1]; abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3]; abcd[1] = xyzn[2] * 4 + xyzn[3];
} }
inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) { inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0]; abcd[2] = xyzn[0];
abcd[1] = xyzn[1]; abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3]; uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C; abcd[0] = t / C;
abcd[3] = t % C; abcd[3] = t % C;
} }
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) { inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] = 0; xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4; xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4; xyzn[1] = abcd[3] % 4;
} }
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) { inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] = 0; xyzn[2] = 0;
xyzn[1] = abcd[2]; xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4; xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4; xyzn[3] = abcd[3] % 4;
} }
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) { inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3]; xyzn[0] = abcd[3];
xyzn[1] = abcd[2]; xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4; xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4; xyzn[3] = abcd[1] % 4;
} }
inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) { inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2]; xyzn[0] = abcd[2];
xyzn[1] = abcd[1]; xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3]; uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4; xyzn[2] = t / 4;
xyzn[3] = t % 4; xyzn[3] = t % 4;
} }
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0]; abcd[2] = xyzn[0];
abcd[1] = xyzn[1]; abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3]; uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C; abcd[0] = t / C;
abcd[3] = t % C; abcd[3] = t % C;
} }
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) { inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2]; xyzn[0] = abcd[2];
xyzn[1] = abcd[1]; xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3]; uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4; xyzn[2] = t / 4;
xyzn[3] = t % 4; xyzn[3] = t % 4;
} }
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) { inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0]; int32_t r = abcd[0];
r = r * dim[1] + abcd[1]; r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2]; r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3]; r = r * dim[3] + abcd[3];
return r; return r;
} }
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) { inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3]; abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2]; abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1]; abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind; abcd[0] = ind;
} }
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]]; opos[i] = ipos[trans[i]];
} }
} }
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i]; opos[trans[i]] = ipos[i];
} }
} }
struct MetalConvParam { struct MetalConvParam {
short offsetX; short offsetX;
short offsetY; short offsetY;
short offsetZ; short offsetZ;
ushort strideX; ushort strideX;
ushort strideY; ushort strideY;
ushort dilationX; ushort dilationX;
ushort dilationY; ushort dilationY;
}; };
...@@ -42,73 +42,73 @@ ...@@ -42,73 +42,73 @@
// uint3 gid [[thread_position_in_grid]]) { // uint3 gid [[thread_position_in_grid]]) {
//} //}
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]], kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]], texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3 #if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]], texture2d_array<P, access::read> in2 [[texture(2)]],
#endif #endif
#if N >= 4 #if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]], texture2d_array<P, access::read> in3 [[texture(3)]],
#endif #endif
#if N >= 5 #if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]], texture2d_array<P, access::read> in4 [[texture(4)]],
#endif #endif
#if N >= 6 #if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]], texture2d_array<P, access::read> in5 [[texture(5)]],
#endif #endif
texture2d_array<P, access::read> inx [[texture(N)]], texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]], texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]], constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm; ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4]; int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z); VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
xyzn[3] = i; xyzn[3] = i;
#if R == 4 #if R == 4
xyzn2abcd_4(cp.odim[3], xyzn, abcd); xyzn2abcd_4(cp.odim[3], xyzn, abcd);
#else #else
FUNC_R(xyzn2abcd, R)(xyzn, abcd); FUNC_R(xyzn2abcd, R)(xyzn, abcd);
#endif #endif
int k = abcd[cp.axis] - cp.offset; int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue; if (k < 0) continue;
int j = 0; int j = 0;
for (; j < N; j++) { for (; j < N; j++) {
if (k < cp.vdim[j]) { if (k < cp.vdim[j]) {
break; break;
} }
k -= cp.vdim[j]; k -= cp.vdim[j];
} }
if (j == N) { if (j == N) {
continue; continue;
} }
int ta = cp.odim[cp.axis]; int ta = cp.odim[cp.axis];
abcd[cp.axis] = k; abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j]; cp.odim[cp.axis] = cp.vdim[j];
#if R == 4 #if R == 4
abcd2xyzn_4(cp.odim[3], abcd, oxyzn); abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
#else #else
FUNC_R(abcd2xyzn, R)(abcd, oxyzn); FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
#endif #endif
cp.odim[cp.axis] = ta; cp.odim[cp.axis] = ta;
switch (j) { switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#if N >= 3 #if N >= 3
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif #endif
#if N >= 4 #if N >= 4
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif #endif
#if N >= 5 #if N >= 5
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif #endif
#if N >= 6 #if N >= 6
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif #endif
} }
} }
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
} }
#endif // V == NORMAL #endif // V == NORMAL
...@@ -117,66 +117,66 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex ...@@ -117,66 +117,66 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
#if V == VX #if V == VX
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]], kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]], texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3 #if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]], texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3 #endif // N >= 3
#if N >= 4 #if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]], texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4 #endif // N >= 4
#if N >= 5 #if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]], texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5 #endif // N >= 5
#if N >= 6 #if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]], texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6 #endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]], texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]], constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset; int x = gid.x - pm.offset;
if (x < 0) return; if (x < 0) return;
if (x < pm.vdim[0]) { if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z); VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
x -= pm.vdim[0]; x -= pm.vdim[0];
if (x < pm.vdim[1]) { if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z); VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#if N >= 3 #if N >= 3
x -= pm.vdim[1]; x -= pm.vdim[1];
if (x < pm.vdim[2]) { if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z); VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 3 #endif // N >= 3
#if N >= 4 #if N >= 4
x -= pm.vdim[2]; x -= pm.vdim[2];
if (x < pm.vdim[3]) { if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z); VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 4 #endif // N >= 4
#if N >= 5 #if N >= 5
x -= pm.vdim[3]; x -= pm.vdim[3];
if (x < pm.vdim[4]) { if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z); VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 5 #endif // N >= 5
#if N >= 6 #if N >= 6
x -= pm.vdim[4]; x -= pm.vdim[4];
if (x < pm.vdim[5]) { if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z); VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 6 #endif // N >= 6
} }
#endif // V == VX #endif // V == VX
...@@ -199,50 +199,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex ...@@ -199,50 +199,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
texture2d_array<P, access::write> out [[texture(N)]], texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]], constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
int y = gid.y - pm.offset; int y = gid.y - pm.offset;
if (y < 0) return; if (y < 0) return;
if (y < pm.vdim[0]) { if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z); VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
y -= pm.vdim[0]; y -= pm.vdim[0];
if (y < pm.vdim[1]) { if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z); VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#if N >= 3 #if N >= 3
y -= pm.vdim[1]; y -= pm.vdim[1];
if (y < pm.vdim[2]) { if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z); VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 3 #endif // N >= 3
#if N >= 4 #if N >= 4
y -= pm.vdim[2]; y -= pm.vdim[2];
if (y < pm.vdim[3]) { if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z); VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 4 #endif // N >= 4
#if N >= 5 #if N >= 5
y -= pm.vdim[3]; y -= pm.vdim[3];
if (y < pm.vdim[4]) { if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z); VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 5 #endif // N >= 5
#if N >= 6 #if N >= 6
y -= pm.vdim[4]; y -= pm.vdim[4];
if (y < pm.vdim[5]) { if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z); VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 6 #endif // N >= 6
} }
#endif // V == VY #endif // V == VY
...@@ -265,50 +265,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex ...@@ -265,50 +265,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
texture2d_array<P, access::write> out [[texture(N)]], texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]], constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
int z = gid.z - pm.offset; int z = gid.z - pm.offset;
if (z < 0) return; if (z < 0) return;
if (z < pm.vdim[0]) { if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z); VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
z -= pm.vdim[0]; z -= pm.vdim[0];
if (z < pm.vdim[1]) { if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z); VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#if N >= 3 #if N >= 3
z -= pm.vdim[1]; z -= pm.vdim[1];
if (z < pm.vdim[2]) { if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z); VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 3 #endif // N >= 3
#if N >= 4 #if N >= 4
z -= pm.vdim[2]; z -= pm.vdim[2];
if (z < pm.vdim[3]) { if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z); VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 4 #endif // N >= 4
#if N >= 5 #if N >= 5
z -= pm.vdim[3]; z -= pm.vdim[3];
if (z < pm.vdim[4]) { if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z); VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 5 #endif // N >= 5
#if N >= 6 #if N >= 6
z -= pm.vdim[4]; z -= pm.vdim[4];
if (z < pm.vdim[5]) { if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z); VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z); out.write(r, gid.xy, gid.z);
return; return;
} }
#endif // N >= 6 #endif // N >= 6
} }
#endif // V == VZ #endif // V == VZ
......
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
using namespace metal; using namespace metal;
struct ConcatParam { struct ConcatParam {
int32_t odim[4]; int32_t odim[4];
int32_t axis; int32_t axis;
int32_t offset; int32_t offset;
int32_t trans[4]; int32_t trans[4];
int32_t vdim[6]; int32_t vdim[6];
}; };
#define VNORMAL 1 #define VNORMAL 1
...@@ -41,129 +41,129 @@ struct ConcatParam { ...@@ -41,129 +41,129 @@ struct ConcatParam {
// ssd-ar: (R=3, N=5, V=x) // ssd-ar: (R=3, N=5, V=x)
#define V VX #define V VX
#define R 3 #define R 3
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=2, N=5, V=x) // ssd-ar: (R=2, N=5, V=x)
#define V VX #define V VX
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=3, N=2, V=y) // ssd-ar: (R=3, N=2, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=4, N=3, V=z) // ssd-ar: (R=4, N=3, V=z)
#define V VZ #define V VZ
#define R 4 #define R 4
#define N 3 #define N 3
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=2, N=6, V=y) // ssd: (R=2, N=6, V=y)
#define V VY #define V VY
#define R 2 #define R 2
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=3, N=6, V=y) // ssd: (R=3, N=6, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VNORMAL #define V VNORMAL
#define R 4 #define R 4
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
......
...@@ -18,45 +18,45 @@ using namespace metal; ...@@ -18,45 +18,45 @@ using namespace metal;
#define P float #define P float
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
#define P half #define P half
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
......
...@@ -16,17 +16,17 @@ ...@@ -16,17 +16,17 @@
using namespace metal; using namespace metal;
struct MetalConvTransposeParam{ struct MetalConvTransposeParam{
ushort kernelW; ushort kernelW;
ushort kernelH; ushort kernelH;
ushort strideX; ushort strideX;
ushort strideY; ushort strideY;
ushort paddingX; ushort paddingX;
ushort paddingY; ushort paddingY;
ushort dilationX; ushort dilationX;
ushort dilationY; ushort dilationY;
}; };
kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]], kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
...@@ -34,83 +34,83 @@ kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inT ...@@ -34,83 +34,83 @@ kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inT
constant MetalConvTransposeParam &param [[buffer(0)]], constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]], const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) { gid.z >= outTexture.get_array_size()) {
return; return;
} }
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1); int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
output.z += dot(input, kernel_slice2); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1);
output.z += dot(input, kernel_slice2);
output.w += dot(input, kernel_slice3);
}
output.w += dot(input, kernel_slice3); outTexture.write(output, gid.xy, gid.z);
}
outTexture.write(output, gid.xy, gid.z);
} }
kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]], constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]], const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) { gid.z >= outTexture.get_array_size()) {
return; return;
} }
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1)); int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
output.z += dot(float4(input), float4(kernel_slice2)); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1));
output.z += dot(float4(input), float4(kernel_slice2));
output.w += dot(float4(input), float4(kernel_slice3));
}
output.w += dot(float4(input), float4(kernel_slice3)); outTexture.write(half4(output), gid.xy, gid.z);
}
outTexture.write(half4(output), gid.xy, gid.z);
} }
//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]], //kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
......
...@@ -18,13 +18,13 @@ ...@@ -18,13 +18,13 @@
using namespace metal; using namespace metal;
struct ElementwiseAddParam { struct ElementwiseAddParam {
int32_t fast; int32_t fast;
int32_t axis; int32_t axis;
int32_t ylen; int32_t ylen;
int32_t xdim[4]; int32_t xdim[4];
int32_t xtrans[4]; int32_t xtrans[4];
int32_t ydim[4]; int32_t ydim[4];
int32_t ytrans[4]; int32_t ytrans[4];
}; };
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]], kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
...@@ -32,69 +32,69 @@ kernel void elementwise_add(texture2d_array<float, access::read> inputX [[textur ...@@ -32,69 +32,69 @@ kernel void elementwise_add(texture2d_array<float, access::read> inputX [[textur
texture2d_array<float, access::write> outTexture [[texture(2)]], texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]], constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
float4 rx, ry; float4 rx, ry;
if (pm.fast == 1) { if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z);
} else { } else {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis; int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) { for (int n = 0; n < 4; n++) {
x_xyzn[3] = n; x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd); invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k]; y_abcd[yshift+k] = t_abcd[k];
} }
trans(ytrans, y_abcd, t_abcd); trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
} }
} float4 r = rx + ry;
float4 r = rx + ry; outTexture.write(r, gid.xy, gid.z);
outTexture.write(r, gid.xy, gid.z);
} }
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]], kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]], texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]], texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]], constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
half4 rx, ry; half4 rx, ry;
if (pm.fast == 1) { if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z);
} else { } else {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis; int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) { for (int n = 0; n < 4; n++) {
x_xyzn[3] = n; x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd); invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k]; y_abcd[yshift+k] = t_abcd[k];
} }
trans(ytrans, y_abcd, t_abcd); trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
} }
} half4 r = rx + ry;
half4 r = rx + ry; outTexture.write(r, gid.xy, gid.z);
outTexture.write(r, gid.xy, gid.z);
} }
...@@ -20,72 +20,72 @@ ...@@ -20,72 +20,72 @@
using namespace metal; using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]], kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]], texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]], texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]], constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]], const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif #endif
#ifdef PRELU_ELEMENT #ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]], const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif #endif
#ifdef PRELU_OTHER #ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]], const device P *alpha [[buffer(1)]],
#endif #endif
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry; VECTOR(P, 4) rx, ry;
if (pm.fast == 1) { if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z);
} else { } else {
rx = inputX.read(gid.xy, gid.z); rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis; int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) { for (int n = 0; n < 4; n++) {
x_xyzn[3] = n; x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd); invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k]; y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
} }
trans(ytrans, y_abcd, t_abcd); }
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); VECTOR(P, 4) output = rx + ry;
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z]; VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif #endif
#ifdef PRELU_ELEMENT #ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif #endif
#ifdef PRELU_OTHER #ifdef PRELU_OTHER
P alpha_value = alpha[0]; P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x); output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y); output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z); output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w); output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif #endif
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
#endif #endif
...@@ -31,7 +31,7 @@ using namespace metal; ...@@ -31,7 +31,7 @@ using namespace metal;
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]], kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]], device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
} }
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]], kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册