提交 34a6b532 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1465 from codeWorm2015/develop

format files, improve accuracy
......@@ -10,37 +10,37 @@ import UIKit
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
}
func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
}
func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
}
func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
}
func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
}
func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
}
func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
}
func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
}
func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
}
......@@ -16,51 +16,51 @@ import Foundation
import paddle_mobile
public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
}
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
}
......@@ -14,13 +14,13 @@ kernel void mobilenet_preprocess(
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_preprocess_half(
......@@ -28,11 +28,11 @@ kernel void mobilenet_preprocess_half(
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
......@@ -10,84 +10,84 @@ import UIKit
import paddle_mobile
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
var net: MobileNet!
var runner: Runner!
var toPredictTexture: MTLTexture?
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
net = MobileNet.init(device: MetalHelper.shared.device)
runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
var net: MobileNet!
var runner: Runner!
var toPredictTexture: MTLTexture?
if let selectImage = UIImage.init(named: "banana.jpeg") {
selectImageView.image = selectImage
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
net = MobileNet.init(device: MetalHelper.shared.device)
runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
if let selectImage = UIImage.init(named: "banana.jpeg") {
selectImageView.image = selectImage
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
}
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
}
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
if let texture = toPredictTexture {
let beginDate = Date.init()
runner.predict(texture: texture) { [weak self] (success, resultHolder) in
if success, let inResultHolder = resultHolder {
let timeUse = Date.init().timeIntervalSince(beginDate)
DispatchQueue.main.async {
self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms"
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder)
}
@IBAction func predictAct(_ sender: Any) {
if let texture = toPredictTexture {
let beginDate = Date.init()
runner.predict(texture: texture) { [weak self] (success, resultHolder) in
if success, let inResultHolder = resultHolder {
let timeUse = Date.init().timeIntervalSince(beginDate)
DispatchQueue.main.async {
self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms"
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder)
}
} else {
print(" predict fail ")
}
}
} else {
print(" predict fail ")
print(" toPredictTexture is nil ")
}
}
} else {
print(" toPredictTexture is nil ")
}
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
fatalError("no image")
}
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
fatalError("no image")
}
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
}
}
}
}
......@@ -16,36 +16,36 @@ import UIKit
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
}
func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
}
func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
}
func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
}
func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
}
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="4MS-jc-i6A">
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
......
......@@ -18,14 +18,14 @@ import Foundation
import paddle_mobile
@objc public class MetalHelper: NSObject {
@objc let device: MTLDevice
@objc let queue: MTLCommandQueue
@objc let textureLoader: MTKTextureLoader
@objc static let shared: MetalHelper = MetalHelper.init()
private override init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
super.init()
}
@objc let device: MTLDevice
@objc let queue: MTLCommandQueue
@objc let textureLoader: MTKTextureLoader
@objc static let shared: MetalHelper = MetalHelper.init()
private override init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
super.init()
}
}
......@@ -16,51 +16,51 @@ import UIKit
import paddle_mobile
class MultiPredictViewController: UIViewController {
var runner1: Runner!
var runner2: Runner!
override func viewDidLoad() {
super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue)
let queue2 = MetalHelper.shared.device.makeCommandQueue()
var runner1: Runner!
var runner2: Runner!
override func viewDidLoad() {
super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue)
let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue)
}
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue)
}
@IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load()
// DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load()
// if success {
// for i in 0..<10000 {
// print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ")
//// print(res)
// })
// }
// } else {
// print("load failed")
// }
// self.runner1.clear()
// }
// return
// DispatchQueue.global().async {
//// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg")
//// if success {
// for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ")
// print(res)
// })
// }
//// } else {
//// print("load failed")
//// }
//// self.runner2.clear()
// }
}
@IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load()
// DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load()
// if success {
// for i in 0..<10000 {
// print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ")
//// print(res)
// })
// }
// } else {
// print("load failed")
// }
// self.runner1.clear()
// }
// return
// DispatchQueue.global().async {
//// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg")
//// if success {
// for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ")
// print(res)
// })
// }
//// } else {
//// print("load failed")
//// }
//// self.runner2.clear()
// }
}
}
......@@ -20,30 +20,30 @@
#import <algorithm>
struct NMSParam {
float *score_data;
float *box_data;
float *output;
int output_size;
std::vector<int> score_dim;
std::vector<int> box_dim;
float scoreThredshold;
int nmsTopK;
int keepTopK;
float nmsEta;
float nmsThreshold;
int background_label;
float *score_data;
float *box_data;
float *output;
int output_size;
std::vector<int> score_dim;
std::vector<int> box_dim;
float scoreThredshold;
int nmsTopK;
int keepTopK;
float nmsEta;
float nmsThreshold;
int background_label;
};
......@@ -53,63 +53,63 @@ constexpr int kBBoxSize = 4;
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
return pair1.first > pair2.first;
}
template <class T>
static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
template <class T>
static inline T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
}
template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename T>
......@@ -120,40 +120,40 @@ static inline void NMSFast(
const T score_threshold, const T nms_threshold,
const T eta, const int top_k,
std::vector<int>* selected_indices) {
// The total boxes for each instance.
int num_boxes = bbox_dim[0];
// 4: [xmin ymin xmax ymax]
int box_size = bbox_dim[1];
std::vector<T> scores_data(num_boxes);
std::copy_n(score_data, num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true);
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
selected_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
// The total boxes for each instance.
int num_boxes = bbox_dim[0];
// 4: [xmin ymin xmax ymax]
int box_size = bbox_dim[1];
std::vector<T> scores_data(num_boxes);
std::copy_n(score_data, num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true);
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
selected_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
}
template <typename T>
......@@ -165,48 +165,48 @@ void MultiClassNMS(const T *boxes_data,
const int& background_label, const int& nms_top_k,
const int& keep_top_k, const T& nms_threshold,
const T& nms_eta, const T& score_threshold) {
int64_t class_num = score_dim[0];
int64_t predict_dim = score_dim[1];
int num_det = 0;
for (int c = 0; c < class_num; ++c) {
if (c == background_label) continue;
const T *score_data = scores_data + c * predict_dim;
/// [c] is key
NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta,
int64_t class_num = score_dim[0];
int64_t predict_dim = score_dim[1];
int num_det = 0;
for (int c = 0; c < class_num; ++c) {
if (c == background_label) continue;
const T *score_data = scores_data + c * predict_dim;
/// [c] is key
NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta,
nms_top_k, &((*indices)[c]));
num_det += (*indices)[c].size();
}
*num_nmsed_out = num_det;
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
// PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
num_det += (*indices)[c].size();
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
*num_nmsed_out = num_det;
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
// PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
}
template <typename T>
......@@ -215,69 +215,69 @@ void MultiClassOutput(const T *scores_data,
const T *bboxes_data,
T *outputs_data,
const std::map<int, std::vector<int>>& selected_indices) {
int predict_dim = score_dim[1];
int count = 0;
for (const auto& it : selected_indices) {
/// one batch
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize;
outputs_data[count * kOutputDim] = label; // label
outputs_data[count * kOutputDim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax
std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T));
count++;
int predict_dim = score_dim[1];
int count = 0;
for (const auto& it : selected_indices) {
/// one batch
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize;
outputs_data[count * kOutputDim] = label; // label
outputs_data[count * kOutputDim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax
std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T));
count++;
}
}
}
}
void MultiClassNMSCompute(NMSParam *param) {
assert(param->score_dim[0] == 1);
assert(param->box_dim[0] == 1);
assert (param->score_dim.size() == 3);
assert(param->box_dim.size() == 3);
float* outputs;
auto background_label = param->background_label;
auto nms_top_k = param->nmsTopK;
auto keep_top_k = param->keepTopK;
auto nms_threshold = param->nmsThreshold;
auto nms_eta = param->nmsEta;
auto score_threshold = param->scoreThredshold;
std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]};
std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]};
std::vector<int> batch_starts = {0};
std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0;
MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out,
background_label, nms_top_k, keep_top_k, nms_threshold,
nms_eta, score_threshold);
batch_starts.push_back(batch_starts.back() + num_nmsed_out);
int output_size = 0;
int num_kept = batch_starts.back();
if (num_kept == 0) {
outputs = new float[1];
outputs[0] = -1;
output_size = 1;
} else {
outputs = new float[num_kept * kOutputDim];
int64_t s = batch_starts[0];
int64_t e = batch_starts[1];
if (e > s) {
MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices);
assert(param->score_dim[0] == 1);
assert(param->box_dim[0] == 1);
assert (param->score_dim.size() == 3);
assert(param->box_dim.size() == 3);
float* outputs;
auto background_label = param->background_label;
auto nms_top_k = param->nmsTopK;
auto keep_top_k = param->keepTopK;
auto nms_threshold = param->nmsThreshold;
auto nms_eta = param->nmsEta;
auto score_threshold = param->scoreThredshold;
std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]};
std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]};
std::vector<int> batch_starts = {0};
std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0;
MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out,
background_label, nms_top_k, keep_top_k, nms_threshold,
nms_eta, score_threshold);
batch_starts.push_back(batch_starts.back() + num_nmsed_out);
int output_size = 0;
int num_kept = batch_starts.back();
if (num_kept == 0) {
outputs = new float[1];
outputs[0] = -1;
output_size = 1;
} else {
outputs = new float[num_kept * kOutputDim];
int64_t s = batch_starts[0];
int64_t e = batch_starts[1];
if (e > s) {
MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices);
}
output_size = num_kept * kOutputDim;
}
output_size = num_kept * kOutputDim;
}
param->output = outputs;
param->output_size = output_size;
param->output = outputs;
param->output_size = output_size;
}
@implementation CPUResult
......@@ -286,31 +286,31 @@ void MultiClassNMSCompute(NMSParam *param) {
@implementation NMSCompute
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
NMSParam param;
param.box_data = bbox;
param.score_data = score;
param.background_label = self.background_label;
param.scoreThredshold = self.scoreThredshold;
param.nmsTopK = self.nmsTopK;
param.keepTopK = self.keepTopK;
param.nmsEta = self.nmsEta;
param.nmsThreshold = self.nmsThreshold;
std::vector<int> score_dim;
for (int i = 0; i < self.scoreDim.count; ++i) {
score_dim.push_back(self.scoreDim[i].intValue);
}
param.score_dim = score_dim;
std::vector<int> box_dim;
for (int i = 0; i < self.bboxDim.count; ++i) {
box_dim.push_back(self.bboxDim[i].intValue);
}
param.box_dim = box_dim;
MultiClassNMSCompute(&param);
CPUResult *cr = [[CPUResult alloc] init];
cr.output = param.output;
cr.outputSize = param.output_size;
return cr;
NMSParam param;
param.box_data = bbox;
param.score_data = score;
param.background_label = self.background_label;
param.scoreThredshold = self.scoreThredshold;
param.nmsTopK = self.nmsTopK;
param.keepTopK = self.keepTopK;
param.nmsEta = self.nmsEta;
param.nmsThreshold = self.nmsThreshold;
std::vector<int> score_dim;
for (int i = 0; i < self.scoreDim.count; ++i) {
score_dim.push_back(self.scoreDim[i].intValue);
}
param.score_dim = score_dim;
std::vector<int> box_dim;
for (int i = 0; i < self.bboxDim.count; ++i) {
box_dim.push_back(self.bboxDim[i].intValue);
}
param.box_dim = box_dim;
MultiClassNMSCompute(&param);
CPUResult *cr = [[CPUResult alloc] init];
cr.output = param.output;
cr.outputSize = param.output_size;
return cr;
}
@end
......
......@@ -16,37 +16,37 @@ import Foundation
import paddle_mobile
public class Genet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device,
inParamPointer: inParamPointer,
inParamSize: inParamSize,
inModelPointer: inModelPointer,
inModelSize: inModelSize)
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
}
class GenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
@objc public override init(device: MTLDevice) {
super.init(device: device)
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device,
inParamPointer: inParamPointer,
inParamSize: inParamSize,
inModelPointer: inModelPointer,
inModelSize: inModelSize)
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
}
class GenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
}
......@@ -16,53 +16,53 @@ import Foundation
import paddle_mobile
public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}
}
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
let resPointer = res[0].result
var s: [String] = []
(0..<res[0].capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [ResultHolder]) -> String {
let resPointer = res[0].result
var s: [String] = []
(0..<res[0].capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
}
......@@ -16,18 +16,18 @@ import Foundation
import paddle_mobile
public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
}
......@@ -16,84 +16,84 @@ import Foundation
import paddle_mobile
public class MobileNet_ssd_hand: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
@objc override public init(device: MTLDevice,inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer inModePointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModePointer,inModelSize:inModelSize)
except = 2
modelPath = ""
paramPath = ""
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0])"
}
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError()
// }
//
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ")
//// print(scoreFormatArr.strideArray())
////
// var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ")
//// print(bboxArr.strideArray())
//
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0;
//
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
//
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " )
// }
//
// let output: [Float32] = result.map { $0.floatValue }
//
//
// return output
fatalError()
}
@objc override public init(device: MTLDevice,inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer inModePointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModePointer,inModelSize:inModelSize)
except = 2
modelPath = ""
paramPath = ""
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0])"
}
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError()
// }
//
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ")
//// print(scoreFormatArr.strideArray())
////
// var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ")
//// print(bboxArr.strideArray())
//
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0;
//
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
//
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " )
// }
//
// let output: [Float32] = result.map { $0.floatValue }
//
//
// return output
fatalError()
}
}
......@@ -16,137 +16,137 @@ import Foundation
import paddle_mobile
public class MobileNet_ssd_AR: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModelPointer,inModelSize:inModelSize)
except = 2
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0])"
}
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError()
// }
// let startDate = Date.init()
@objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModelPointer,inModelSize:inModelSize)
except = 2
preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
// print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray())
//
// print("bbox arr: ")
//
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.25
// nmsCompute.nmsTopK = 100
// nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.449999988
// nmsCompute.background_label = 0;
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// fatalError( " result error " )
// }
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) {
//
// print("i \(i) : \(result.output[i])")
// }
// print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0])
// return resultHolder
}
// override func updateProgram(program: Program) {
// for i in [56, 66, 76, 86, 93, 99] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
//
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
//
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
//
// program.scope[output] = originTexture
//
// if i == 99 {
// opDesc.attrs["axis"] = 0
// } else {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
// }
//
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
//
//
//
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
//
// for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// }
//
// for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])")
// }
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0])"
}
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError()
// }
// let startDate = Date.init()
// print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray())
//
// print("bbox arr: ")
//
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.25
// nmsCompute.nmsTopK = 100
// nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.449999988
// nmsCompute.background_label = 0;
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// fatalError( " result error " )
// }
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) {
//
// print("i \(i) : \(result.output[i])")
// }
// print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0])
// return resultHolder
}
// override func updateProgram(program: Program) {
// for i in [56, 66, 76, 86, 93, 99] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
//
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
//
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
//
// program.scope[output] = originTexture
//
// if i == 99 {
// opDesc.attrs["axis"] = 0
// } else {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
// }
//
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
//
//
//
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
//
// for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// }
//
// for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])")
// }
// 99
// }
// }
}
......@@ -17,9 +17,9 @@ using namespace metal;
kernel void mobilenet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
......@@ -31,9 +31,9 @@ kernel void mobilenet_preprocess(
}
kernel void mobilenet_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
......@@ -45,9 +45,9 @@ kernel void mobilenet_preprocess_half(
}
kernel void mobilenet_ssd_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
......@@ -59,9 +59,9 @@ kernel void mobilenet_ssd_preprocess(
}
kernel void mobilenet_ssd_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
......@@ -74,44 +74,44 @@ kernel void mobilenet_ssd_preprocess_half(
kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void genet_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilent_ar_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
......@@ -17,18 +17,18 @@ import Foundation
import paddle_mobile
public class YoloNet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
}
}
......@@ -34,83 +34,83 @@
@implementation LoadPointerViewController
- (void)viewDidLoad {
[super viewDidLoad];
self.imageView.image = [UIImage imageNamed:@"banana.jpeg"];
NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"super_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"super_params" withExtension:nil].path;
long fileSize;
FILE *fp;
fp = fopen([modelPath UTF8String], "rb");
fseek(fp, 0, SEEK_END);
fileSize = ftell(fp);
rewind(fp);
void *buffer = malloc(fileSize);
fread(buffer, 1, fileSize, fp);
fclose(fp);
long paramfileSize;
FILE *parmaFilePointer;
parmaFilePointer = fopen([paramPath UTF8String], "rb");
fseek(parmaFilePointer, 0, SEEK_END);
paramfileSize = ftell(parmaFilePointer);
rewind(parmaFilePointer);
void *parmaBuffer = malloc(paramfileSize);
fread(parmaBuffer, 1, paramfileSize, parmaFilePointer);
fclose(parmaFilePointer);
_modelConfig = [[ModelConfig alloc] init];
_modelConfig.modelPointer = buffer;
_modelConfig.modelSize = (int)fileSize;
_modelConfig.paramPointer = parmaBuffer;
_modelConfig.paramSize = (int)paramfileSize;
[super viewDidLoad];
self.imageView.image = [UIImage imageNamed:@"banana.jpeg"];
NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"super_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"super_params" withExtension:nil].path;
long fileSize;
FILE *fp;
fp = fopen([modelPath UTF8String], "rb");
fseek(fp, 0, SEEK_END);
fileSize = ftell(fp);
rewind(fp);
void *buffer = malloc(fileSize);
fread(buffer, 1, fileSize, fp);
fclose(fp);
long paramfileSize;
FILE *parmaFilePointer;
parmaFilePointer = fopen([paramPath UTF8String], "rb");
fseek(parmaFilePointer, 0, SEEK_END);
paramfileSize = ftell(parmaFilePointer);
rewind(parmaFilePointer);
void *parmaBuffer = malloc(paramfileSize);
fread(parmaBuffer, 1, paramfileSize, parmaFilePointer);
fclose(parmaFilePointer);
_modelConfig = [[ModelConfig alloc] init];
_modelConfig.modelPointer = buffer;
_modelConfig.modelSize = (int)fileSize;
_modelConfig.paramPointer = parmaBuffer;
_modelConfig.paramSize = (int)paramfileSize;
}
- (IBAction)loaderButtonPressed:(id)sender {
self.paddleMobile = [[PaddleMobileGPU alloc] initWithCommandQueue:MetalHelper.shared.queue net:SuperResolutionNetType modelConfig:_modelConfig];
_loaded = [self.paddleMobile load];
NSLog(@" load 结果: %@", _loaded ? @"成功" : @"失败");
self.paddleMobile = [[PaddleMobileGPU alloc] initWithCommandQueue:MetalHelper.shared.queue net:SuperResolutionNetType modelConfig:_modelConfig];
_loaded = [self.paddleMobile load];
NSLog(@" load 结果: %@", _loaded ? @"成功" : @"失败");
}
- (IBAction)predictButtonPressed:(id)sender {
[self predict];
[self predict];
}
- (void)predict {
UIImage *image = self.imageView.image;
if (!image) {
NSLog(@" image is nil");
return;
}
id<MTLTexture> texture = [MetalHelper.shared.textureLoader newTextureWithCGImage:image.CGImage options:nil error:nil];
_texture = texture;
if (!_texture) {
NSLog(@" texture is nil");
return;
}
if (!self.loaded) {
NSLog(@" not load ");
return;
}
NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
NSInteger max = 1;
for (int i = 0;i < max; i ++) {
[self.paddleMobile predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
if (success) {
if (i == max -1) {
double time = [[NSDate date] timeIntervalSince1970] - startTime;
time = (time/max)*1000;
NSLog(@"gap ==== %fms",time);
}
}
}];
}
UIImage *image = self.imageView.image;
if (!image) {
NSLog(@" image is nil");
return;
}
id<MTLTexture> texture = [MetalHelper.shared.textureLoader newTextureWithCGImage:image.CGImage options:nil error:nil];
_texture = texture;
if (!_texture) {
NSLog(@" texture is nil");
return;
}
if (!self.loaded) {
NSLog(@" not load ");
return;
}
NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
NSInteger max = 1;
for (int i = 0;i < max; i ++) {
[self.paddleMobile predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
if (success) {
if (i == max -1) {
double time = [[NSDate date] timeIntervalSince1970] - startTime;
time = (time/max)*1000;
NSLog(@"gap ==== %fms",time);
}
}
}];
}
}
- (IBAction)clear:(id)sender {
[self.paddleMobile clear];
self.loaded = NO;
[self.paddleMobile clear];
self.loaded = NO;
}
@end
......@@ -16,8 +16,8 @@
#import <Foundation/Foundation.h>
typedef enum : NSUInteger {
SuperResolutionNetType,
MobileNetSSDType
SuperResolutionNetType,
MobileNetSSDType
} NetType;
@interface PaddleMobileGPUResult: NSObject
......
......@@ -30,75 +30,75 @@
@implementation PaddleMobileGPUResult
- (void)setOutputResult:(ResultHolder *)resultHolder {
self.resultHolder = resultHolder;
self.output = resultHolder.result;
self.outputSize = resultHolder.capacity;
self.resultHolder = resultHolder;
self.output = resultHolder.result;
self.outputSize = resultHolder.capacity;
}
-(void)releaseOutput {
[self.resultHolder releasePointer];
[self.resultHolder releasePointer];
}
@end
@interface PaddleMobileGPU ()
{
Runner *runner;
Runner *runner;
}
@end
@implementation PaddleMobileGPU
-(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config {
self = [super init];
if (self) {
Net *net = nil;
if (netType == SuperResolutionNetType) {
net = [[SuperResolutionNet alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
} else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
self = [super init];
if (self) {
Net *net = nil;
if (netType == SuperResolutionNetType) {
net = [[SuperResolutionNet alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
} else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
}
runner = [[Runner alloc] initInNet:net commandQueue:queue];
}
runner = [[Runner alloc] initInNet:net commandQueue:queue];
}
return self;
return self;
}
-(BOOL)load {
return [runner load];
return [runner load];
}
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSArray <NSNumber *>*> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray<NSMutableArray <NSNumber *>*> *ocResultArray = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *resultHolder = resultArr[i];
NSMutableArray <NSNumber *>*res = [NSMutableArray arrayWithCapacity:resultHolder.capacity];
for (int j = 0; j < resultHolder.capacity; ++j) {
[res addObject:[NSNumber numberWithFloat:resultHolder.result[i]]];
}
[ocResultArray addObject:res];
[resultHolder releasePointer];
}
completion(success, ocResultArray);
}];
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray<NSMutableArray <NSNumber *>*> *ocResultArray = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *resultHolder = resultArr[i];
NSMutableArray <NSNumber *>*res = [NSMutableArray arrayWithCapacity:resultHolder.capacity];
for (int j = 0; j < resultHolder.capacity; ++j) {
[res addObject:[NSNumber numberWithFloat:resultHolder.result[i]]];
}
[ocResultArray addObject:res];
[resultHolder releasePointer];
}
completion(success, ocResultArray);
}];
}
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, NSArray <PaddleMobileGPUResult *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray <PaddleMobileGPUResult *> *ocResultArr = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *result = resultArr[i];
PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
gpuResult.dim = result.dim;
[gpuResult setOutputResult:result];
[ocResultArr addObject:gpuResult];
}
completion(success, ocResultArr);
}];
[runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray <PaddleMobileGPUResult *> *ocResultArr = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *result = resultArr[i];
PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
gpuResult.dim = result.dim;
[gpuResult setOutputResult:result];
[ocResultArr addObject:gpuResult];
}
completion(success, ocResultArr);
}];
}
-(void)clear {
[runner clear];
[runner clear];
}
@end
......@@ -16,57 +16,57 @@ import Foundation
import paddle_mobile
@objc public class SuperResolutionNet: Net{
override public func resultStr(res: [ResultHolder]) -> String {
return "未实现"
}
public override init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize: Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device)
except = 0
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
inputDim = Dim.init(inDim: [1, 224, 224, 3])
self.paramPointer = inParamPointer
self.paramSize = inParamSize
self.modelPointer = inModelPointer
self.modelSize = inModelSize
}
override public func resultStr(res: [ResultHolder]) -> String {
return "未实现"
}
public override init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize: Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device)
except = 0
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
inputDim = Dim.init(inDim: [1, 224, 224, 3])
self.paramPointer = inParamPointer
self.paramSize = inParamSize
self.modelPointer = inModelPointer
self.modelSize = inModelSize
}
@objc override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
@objc override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
}
override public func updateProgram(program: Program) {
// n h w c
for block in program.programDesc.blocks {
for varDesc in block.vars {
if !varDesc.persistable {
if varDesc.type == .LodTensor {
let varEle = program.scope.vars[varDesc.name]
if let texture = varEle as? Texture {
let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]])
print(" var desc name " + varDesc.name + " new dim" + "\(newDim)")
texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
let output: FetchHolder = program.scope.output() as! FetchHolder
output.dim = newDim
output.capacity = newDim.numel()
output.paddedCapacity = newDim.numel() * 4
output.initBuffer(device: device)
override public func updateProgram(program: Program) {
// n h w c
for block in program.programDesc.blocks {
for varDesc in block.vars {
if !varDesc.persistable {
if varDesc.type == .LodTensor {
let varEle = program.scope.vars[varDesc.name]
if let texture = varEle as? Texture {
let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]])
print(" var desc name " + varDesc.name + " new dim" + "\(newDim)")
texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
let output: FetchHolder = program.scope.output() as! FetchHolder
output.dim = newDim
output.capacity = newDim.numel()
output.paddedCapacity = newDim.numel() * 4
output.initBuffer(device: device)
}
}
}
}
}
}
}
}
}
}
......@@ -4,28 +4,28 @@ import Foundation
import QuartzCore
public class FPSCounter {
private(set) public var fps: Double = 0
var frames = 0
var startTime: CFTimeInterval = 0
public func start() {
frames = 0
startTime = CACurrentMediaTime()
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
private(set) public var fps: Double = 0
var frames = 0
var startTime: CFTimeInterval = 0
public func start() {
frames = 0
startTime = CACurrentMediaTime()
}
}
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
frames = 0
startTime = CACurrentMediaTime()
}
}
}
}
......@@ -6,15 +6,15 @@ import AVFoundation
@available(iOS 10.0, *)
@objc public protocol VideoCaptureDelegate: NSObjectProtocol {
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?)
}
/**
Simple interface to the iPhone's camera.
*/
Simple interface to the iPhone's camera.
*/
@available(iOS 10.0, *)
public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer?
......@@ -35,9 +35,9 @@ public class VideoCapture: NSObject {
self.cameraPosition = position
super.init()
}
public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
completion: @escaping (Bool) -> Void) {
completion: @escaping (Bool) -> Void) {
queue.async {
let success = self.setUpCamera(sessionPreset: sessionPreset)
DispatchQueue.main.async {
......@@ -45,7 +45,7 @@ public class VideoCapture: NSObject {
}
}
}
func fontCamera() -> AVCaptureDevice? {
let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices
return deveices.first
......@@ -62,7 +62,7 @@ public class VideoCapture: NSObject {
captureSession.beginConfiguration()
captureSession.sessionPreset = sessionPreset
var oCaptureDevice: AVCaptureDevice?
switch cameraPosition {
case .back:
......@@ -79,56 +79,56 @@ public class VideoCapture: NSObject {
print("Error: no video devices available")
return false
}
guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
print("Error: could not create AVCaptureDeviceInput")
return false
}
if captureSession.canAddInput(videoInput) {
captureSession.addInput(videoInput)
}
let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
previewLayer.connection?.videoOrientation = self.videoOrientation
self.previewLayer = previewLayer
let settings: [String : Any] = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
]
videoOutput.videoSettings = settings
videoOutput.alwaysDiscardsLateVideoFrames = true
videoOutput.setSampleBufferDelegate(self, queue: queue)
if captureSession.canAddOutput(videoOutput) {
captureSession.addOutput(videoOutput)
}
// We want the buffers to be in portrait orientation otherwise they are
// rotated by 90 degrees. Need to set this _after_ addOutput()!
videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation
if captureSession.canAddOutput(photoOutput) {
captureSession.addOutput(photoOutput)
}
captureSession.commitConfiguration()
return true
}
public func start() {
if !captureSession.isRunning {
captureSession.startRunning()
}
}
public func stop() {
if captureSession.isRunning {
captureSession.stopRunning()
}
}
/* Captures a single frame of the camera input. */
public func capturePhoto() {
let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)])
......@@ -139,7 +139,7 @@ public class VideoCapture: NSObject {
]
photoOutput.capturePhoto(with: settings, delegate: self)
}
func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? {
if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer)
......@@ -152,7 +152,7 @@ public class VideoCapture: NSObject {
}
return nil
}
func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? {
if let sampleBuffer = sampleBuffer,
let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
......@@ -172,47 +172,47 @@ public class VideoCapture: NSObject {
@available(iOS 10.0, *)
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview,
// we capture at full speed but only call the delegate at its desired
// framerate. If `fps` is -1, we run at the full framerate.
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let deltaTime = timestamp - lastTimestamp
if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
lastTimestamp = timestamp
self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview,
// we capture at full speed but only call the delegate at its desired
// framerate. If `fps` is -1, we run at the full framerate.
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let deltaTime = timestamp - lastTimestamp
if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
lastTimestamp = timestamp
self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
}
}
}
}
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
print("dropped frame")
}
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
print("dropped frame")
}
}
@available(iOS 10.0, *)
extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
resolvedSettings: AVCaptureResolvedPhotoSettings,
bracketSettings: AVCaptureBracketedStillImageSettings?,
error: Error?) {
var imageTexture: MTLTexture?
var previewImage: UIImage?
if error == nil {
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
}
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
resolvedSettings: AVCaptureResolvedPhotoSettings,
bracketSettings: AVCaptureBracketedStillImageSettings?,
error: Error?) {
var imageTexture: MTLTexture?
var previewImage: UIImage?
if error == nil {
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
}
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
}
}
}
}
}
......@@ -19,265 +19,242 @@ import paddle_mobile
import MetalPerformanceShaders
class FileReader {
let file: UnsafeMutablePointer<FILE>
let fileSize: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
let file: UnsafeMutablePointer<FILE>
let fileSize: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
deinit {
fclose(file)
}
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
}
deinit {
fclose(file)
}
}
enum Platform {
case GPU
case GPU
}
let platformSupport: [(Platform, String)] = [(.GPU, "GPU")]
enum SupportModel: String{
case yolo = "yolo"
case mobilenet_combined = "mobilenet_combined"
case super_resolution = "superresoltion"
case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] {
return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet]
}
case yolo = "yolo"
case mobilenet_combined = "mobilenet_combined"
case super_resolution = "superresoltion"
case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] {
return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet]
}
}
let netSupport: [SupportModel : Net] = [
.super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device),
.yolo : YoloNet.init(device: MetalHelper.shared.device),
.mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device),
.mobilenet : MobileNet.init(device: MetalHelper.shared.device)]
.super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device),
.yolo : YoloNet.init(device: MetalHelper.shared.device),
.mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device),
.mobilenet : MobileNet.init(device: MetalHelper.shared.device)]
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView!
// var videoCapture: VideoCapture!
var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture?
var runner: Runner!
var platform: Platform = .GPU
var threadNum = 1
@IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
//
//
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
//
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil {
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture
// }
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView!
// var videoCapture: VideoCapture!
var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture?
var runner: Runner!
var platform: Platform = .GPU
var threadNum = 1
@IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
//
//
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
//
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil {
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture
// }
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture
}
}
} else {
fatalError( " unsupport " )
}
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture
if runner.load() {
print(" load success ! ")
} else {
print(" load error ! ")
}
}
} else {
fatalError( " unsupport " )
}
if runner.load() {
print(" load success ! ")
} else {
print(" load error ! ")
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
let max = 1
switch platform {
case .GPU:
guard let inTexture = toPredictTexture else {
resultTextView.text = "请选择图片 ! "
return
}
let startDate = Date.init()
for i in 0..<max {
self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
if success, let inResultHolderArr = resultHolder {
let inResultHolder = inResultHolderArr[0]
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
let max = 1
switch platform {
case .GPU:
guard let inTexture = toPredictTexture else {
resultTextView.text = "请选择图片 ! "
return
}
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray())
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
let startDate = Date.init()
for i in 0..<max {
self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
if success, let inResultHolderArr = resultHolder {
let inResultHolder = inResultHolderArr[0]
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray())
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
DispatchQueue.main.async {
resultHolder?.first?.releasePointer()
}
}
}
}
DispatchQueue.main.async {
resultHolder?.first?.releasePointer()
}
}
}
}
}
override func viewDidLoad() {
super.viewDidLoad()
modelPickerView.delegate = self
modelPickerView.dataSource = self
threadPickerView.delegate = self
threadPickerView.dataSource = self
if let image = UIImage.init(named: "classify-img-output.png") {
selectImage = image
selectImageView.image = image
} else {
print("请添加测试图片")
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
GlobalConfig.shared.debug = false
modelPickerView.delegate = self
modelPickerView.dataSource = self
threadPickerView.delegate = self
threadPickerView.dataSource = self
if let image = UIImage.init(named: "00001.jpg") {
selectImage = image
selectImageView.image = image
} else {
print("请添加测试图片")
}
}
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
}
}
extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
func numberOfComponents(in pickerView: UIPickerView) -> Int {
if pickerView == modelPickerView {
return 1
} else if pickerView == threadPickerView {
return 1
} else {
fatalError()
func numberOfComponents(in pickerView: UIPickerView) -> Int {
if pickerView == modelPickerView {
return 1
} else if pickerView == threadPickerView {
return 1
} else {
fatalError()
}
}
}
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
if pickerView == modelPickerView {
return SupportModel.supportedModels().count
} else if pickerView == threadPickerView {
return platformSupport.count
} else {
fatalError()
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
if pickerView == modelPickerView {
return SupportModel.supportedModels().count
} else if pickerView == threadPickerView {
return platformSupport.count
} else {
fatalError()
}
}
}
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
if pickerView == modelPickerView {
return SupportModel.supportedModels()[row].rawValue
} else if pickerView == threadPickerView {
return platformSupport[row].1
} else {
fatalError()
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
if pickerView == modelPickerView {
return SupportModel.supportedModels()[row].rawValue
} else if pickerView == threadPickerView {
return platformSupport[row].1
} else {
fatalError()
}
}
}
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
if pickerView == modelPickerView {
self.modelType = SupportModel.supportedModels()[row]
} else if pickerView == threadPickerView {
platform = platformSupport[row].0
} else {
fatalError()
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
if pickerView == modelPickerView {
self.modelType = SupportModel.supportedModels()[row]
} else if pickerView == threadPickerView {
platform = platformSupport[row].0
} else {
fatalError()
}
}
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{
fatalError("no image")
}
sSelf.selectImage = image
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{
fatalError("no image")
}
sSelf.selectImage = image
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
}
}
}
}
var bool1 = false
extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0])
resultHolder?.first?.releasePointer()
})
func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0])
resultHolder?.first?.releasePointer()
})
}
}
}
}
......
......@@ -326,9 +326,10 @@
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos;
};
name = Debug;
......@@ -337,9 +338,10 @@
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos;
};
name = Release;
......
......@@ -20,23 +20,23 @@ kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0
const device float4 * nscale [[buffer(0)]],
const device float4 * nbias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -15,10 +15,10 @@ struct MetalConvParam {
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
......@@ -32,5 +32,5 @@ kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
......@@ -21,29 +21,29 @@
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
#endif
......@@ -16,8 +16,8 @@
using namespace metal;
struct bilinear_interp_param {
float ratio_h;
float ratio_w;
float ratio_h;
float ratio_w;
};
#define P float
......
......@@ -20,35 +20,35 @@
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2;
P pw = p.z - p.x;
P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r;
r.x = tx - tw / 2;
r.y = ty - th / 2;
r.z = tx + tw / 2;
r.w = ty + th / 2;
output.write(r, gid.xy, gid.z);
texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2;
P pw = p.z - p.x;
P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r;
r.x = tx - tw / 2;
r.y = ty - th / 2;
r.z = tx + tw / 2;
r.w = ty + th / 2;
output.write(r, gid.xy, gid.z);
}
#endif
......@@ -13,24 +13,24 @@ kernel void buffer_to_texture_kernel(
const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
......@@ -17,104 +17,104 @@ using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
}
inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4;
xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4;
xyzn[0] = abcd[3];
xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4;
}
inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
}
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
}
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
}
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
}
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
};
......@@ -42,73 +42,73 @@
// uint3 gid [[thread_position_in_grid]]) {
//}
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif
texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) {
xyzn[3] = i;
texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) {
xyzn[3] = i;
#if R == 4
xyzn2abcd_4(cp.odim[3], xyzn, abcd);
xyzn2abcd_4(cp.odim[3], xyzn, abcd);
#else
FUNC_R(xyzn2abcd, R)(xyzn, abcd);
FUNC_R(xyzn2abcd, R)(xyzn, abcd);
#endif
int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue;
int j = 0;
for (; j < N; j++) {
if (k < cp.vdim[j]) {
break;
}
k -= cp.vdim[j];
}
if (j == N) {
continue;
}
int ta = cp.odim[cp.axis];
abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j];
int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue;
int j = 0;
for (; j < N; j++) {
if (k < cp.vdim[j]) {
break;
}
k -= cp.vdim[j];
}
if (j == N) {
continue;
}
int ta = cp.odim[cp.axis];
abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j];
#if R == 4
abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
#else
FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
#endif
cp.odim[cp.axis] = ta;
switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
cp.odim[cp.axis] = ta;
switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#if N >= 3
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 4
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 5
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 6
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
}
}
out.write(r, gid.xy, gid.z);
}
}
out.write(r, gid.xy, gid.z);
}
#endif // V == NORMAL
......@@ -117,66 +117,66 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
#if V == VX
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VX
......@@ -199,50 +199,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int y = gid.y - pm.offset;
if (y < 0) return;
if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[0];
if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
int y = gid.y - pm.offset;
if (y < 0) return;
if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[0];
if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
y -= pm.vdim[1];
if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[1];
if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= pm.vdim[2];
if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[2];
if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
y -= pm.vdim[3];
if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[3];
if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
y -= pm.vdim[4];
if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[4];
if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VY
......@@ -265,50 +265,50 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int z = gid.z - pm.offset;
if (z < 0) return;
if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[0];
if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
int z = gid.z - pm.offset;
if (z < 0) return;
if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[0];
if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
z -= pm.vdim[1];
if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[1];
if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
z -= pm.vdim[2];
if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[2];
if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
z -= pm.vdim[3];
if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[3];
if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
z -= pm.vdim[4];
if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[4];
if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VZ
......
......@@ -18,11 +18,11 @@
using namespace metal;
struct ConcatParam {
int32_t odim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[6];
int32_t odim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[6];
};
#define VNORMAL 1
......@@ -41,129 +41,129 @@ struct ConcatParam {
// ssd-ar: (R=3, N=5, V=x)
#define V VX
#define R 3
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 3
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=2, N=5, V=x)
#define V VX
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 3
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=4, N=3, V=z)
#define V VZ
#define R 4
#define N 3
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 4
#define N 3
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=2, N=6, V=y)
#define V VY
#define R 2
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 2
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=3, N=6, V=y)
#define V VY
#define R 3
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 3
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VNORMAL
#define R 4
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 4
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 2
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
......
......@@ -18,45 +18,45 @@ using namespace metal;
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
......
......@@ -16,17 +16,17 @@
using namespace metal;
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
......@@ -34,83 +34,83 @@ kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inT
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
output.y += dot(input, kernel_slice1);
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
output.z += dot(input, kernel_slice2);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1);
output.z += dot(input, kernel_slice2);
output.w += dot(input, kernel_slice3);
}
output.w += dot(input, kernel_slice3);
}
outTexture.write(output, gid.xy, gid.z);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
output.y += dot(float4(input), float4(kernel_slice1));
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
output.z += dot(float4(input), float4(kernel_slice2));
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1));
output.z += dot(float4(input), float4(kernel_slice2));
output.w += dot(float4(input), float4(kernel_slice3));
}
output.w += dot(float4(input), float4(kernel_slice3));
}
outTexture.write(half4(output), gid.xy, gid.z);
outTexture.write(half4(output), gid.xy, gid.z);
}
//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
......
......@@ -18,13 +18,13 @@
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
......@@ -32,69 +32,69 @@ kernel void elementwise_add(texture2d_array<float, access::read> inputX [[textur
texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
}
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
}
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
......@@ -20,72 +20,72 @@
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
outTexture.write(output, gid.xy, gid.z);
}
#endif
......@@ -31,7 +31,7 @@ using namespace metal;
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册