提交 005115a1 编写于 作者: L liuruilong

format files, improve accuracy

上级 93a1705a
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="4MS-jc-i6A"> <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait"> <device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/> <adaptation id="fullscreen"/>
</device> </device>
......
...@@ -30,37 +30,37 @@ class MultiPredictViewController: UIViewController { ...@@ -30,37 +30,37 @@ class MultiPredictViewController: UIViewController {
@IBAction func predictAct(_ sender: Any) { @IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load() let success = self.runner2.load()
// DispatchQueue.global().async { // DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg") let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load() // let success = self.runner2.load()
// if success { // if success {
// for i in 0..<10000 { // for i in 0..<10000 {
// print(i) // print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ") // print("result1: ")
//// print(res) //// print(res)
// }) // })
// } // }
// } else { // } else {
// print("load failed") // print("load failed")
// } // }
// self.runner1.clear() // self.runner1.clear()
// } // }
// return // return
// DispatchQueue.global().async { // DispatchQueue.global().async {
//// sleep(1) //// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg") // let image1 = UIImage.init(named: "banana.jpeg")
//// if success { //// if success {
// for _ in 0..<10 { // for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ") // print("result2: ")
// print(res) // print(res)
// }) // })
// } // }
//// } else { //// } else {
//// print("load failed") //// print("load failed")
//// } //// }
//// self.runner2.clear() //// self.runner2.clear()
// } // }
} }
} }
...@@ -57,8 +57,8 @@ public class MobileNet: Net{ ...@@ -57,8 +57,8 @@ public class MobileNet: Net{
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib // metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " // metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device) preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
......
...@@ -51,45 +51,45 @@ public class MobileNet_ssd_hand: Net { ...@@ -51,45 +51,45 @@ public class MobileNet_ssd_hand: Net {
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] { override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// guard let interRes = paddleMobileRes.intermediateResults else { // guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ") // fatalError(" need have inter result ")
// } // }
// //
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else { // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ") // fatalError(" need score ")
// } // }
// //
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else { // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError() // fatalError()
// } // }
// //
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) // var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ") //// print("score: ")
//// print(scoreFormatArr.strideArray()) //// print(scoreFormatArr.strideArray())
//// ////
// var bboxArr = bbox.metalTexture.float32Array() // var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ") //// print("bbox: ")
//// print(bboxArr.strideArray()) //// print(bboxArr.strideArray())
// //
// let nmsCompute = NMSCompute.init() // let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01 // nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400 // nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200 // nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0 // nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45 // nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0; // nmsCompute.background_label = 0;
// //
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] // nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
// //
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] // nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { // guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " ) // fatalError( " result error " )
// } // }
// //
// let output: [Float32] = result.map { $0.floatValue } // let output: [Float32] = result.map { $0.floatValue }
// //
// //
// return output // return output
fatalError() fatalError()
} }
......
...@@ -49,104 +49,104 @@ public class MobileNet_ssd_AR: Net { ...@@ -49,104 +49,104 @@ public class MobileNet_ssd_AR: Net {
override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] { override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
fatalError() fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else { // guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ") // fatalError(" need have inter result ")
// } // }
// //
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else { // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ") // fatalError(" need score ")
// } // }
// //
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else { // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError() // fatalError()
// } // }
// let startDate = Date.init() // let startDate = Date.init()
// print("scoreFormatArr: ") // print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray()) //print((0..<score.capacity).map{ score.result[$0] }.strideArray())
// //
// print("bbox arr: ") // print("bbox arr: ")
// //
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray()) // print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
// let nmsCompute = NMSCompute.init() // let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.25 // nmsCompute.scoreThredshold = 0.25
// nmsCompute.nmsTopK = 100 // nmsCompute.nmsTopK = 100
// nmsCompute.keepTopK = 100 // nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0 // nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.449999988 // nmsCompute.nmsThreshold = 0.449999988
// nmsCompute.background_label = 0; // nmsCompute.background_label = 0;
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])] // nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])] // nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else { // guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// fatalError( " result error " ) // fatalError( " result error " )
// } // }
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize)) // let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) { // for i in 0..<Int(result.outputSize) {
// //
// print("i \(i) : \(result.output[i])") // print("i \(i) : \(result.output[i])")
// } // }
// print(Date.init().timeIntervalSince(startDate)) // print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0]) // print(resultHolder.result![0])
// return resultHolder // return resultHolder
} }
// override func updateProgram(program: Program) { // override func updateProgram(program: Program) {
// for i in [56, 66, 76, 86, 93, 99] { // for i in [56, 66, 76, 86, 93, 99] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
// //
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) // originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
// //
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) // originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
// //
// program.scope[output] = originTexture // program.scope[output] = originTexture
// //
// if i == 99 { // if i == 99 {
// opDesc.attrs["axis"] = 0 // opDesc.attrs["axis"] = 0
// } else { // } else {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// } // }
// } // }
// //
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] { // for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// //
// //
// //
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// } // }
// //
// for i in [60, 101, 90, 97, 70, 80] { // for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// } // }
// //
// for i in [102] { // for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! { // for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// } // }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])") // print(" split axis \(opDesc.attrs["axis"])")
// } // }
// 99 // 99
// } // }
} }
...@@ -14,7 +14,7 @@ import AVFoundation ...@@ -14,7 +14,7 @@ import AVFoundation
/** /**
Simple interface to the iPhone's camera. Simple interface to the iPhone's camera.
*/ */
@available(iOS 10.0, *) @available(iOS 10.0, *)
public class VideoCapture: NSObject { public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer? public var previewLayer: AVCaptureVideoPreviewLayer?
......
...@@ -89,21 +89,21 @@ class ViewController: UIViewController { ...@@ -89,21 +89,21 @@ class ViewController: UIViewController {
@IBAction func loadAct(_ sender: Any) { @IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue) runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU { if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil) // let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!) // let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read() // let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
// //
// //
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared) // let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
// //
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize) // buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil { if self.toPredictTexture == nil {
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in // runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture // self?.toPredictTexture = texture
// } // }
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture self?.toPredictTexture = texture
...@@ -171,42 +171,19 @@ class ViewController: UIViewController { ...@@ -171,42 +171,19 @@ class ViewController: UIViewController {
override func viewDidLoad() { override func viewDidLoad() {
super.viewDidLoad() super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
GlobalConfig.shared.debug = false
modelPickerView.delegate = self modelPickerView.delegate = self
modelPickerView.dataSource = self modelPickerView.dataSource = self
threadPickerView.delegate = self threadPickerView.delegate = self
threadPickerView.dataSource = self threadPickerView.dataSource = self
if let image = UIImage.init(named: "classify-img-output.png") { if let image = UIImage.init(named: "00001.jpg") {
selectImage = image selectImage = image
selectImageView.image = image selectImageView.image = image
} else { } else {
print("请添加测试图片") print("请添加测试图片")
} }
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
} }
} }
......
...@@ -326,9 +326,10 @@ ...@@ -326,9 +326,10 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO; ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES; MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos; SDKROOT = iphoneos;
}; };
name = Debug; name = Debug;
...@@ -337,9 +338,10 @@ ...@@ -337,9 +338,10 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO; ALWAYS_SEARCH_USER_PATHS = NO;
IPHONEOS_DEPLOYMENT_TARGET = 12.1; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
MTL_ENABLE_DEBUG_INFO = NO; MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES; MTL_FAST_MATH = YES;
MTL_LANGUAGE_REVISION = Metal12;
SDKROOT = iphoneos; SDKROOT = iphoneos;
}; };
name = Release; name = Release;
......
...@@ -41,129 +41,129 @@ struct ConcatParam { ...@@ -41,129 +41,129 @@ struct ConcatParam {
// ssd-ar: (R=3, N=5, V=x) // ssd-ar: (R=3, N=5, V=x)
#define V VX #define V VX
#define R 3 #define R 3
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=2, N=5, V=x) // ssd-ar: (R=2, N=5, V=x)
#define V VX #define V VX
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=3, N=2, V=y) // ssd-ar: (R=3, N=2, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=4, N=3, V=z) // ssd-ar: (R=4, N=3, V=z)
#define V VZ #define V VZ
#define R 4 #define R 4
#define N 3 #define N 3
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=2, N=6, V=y) // ssd: (R=2, N=6, V=y)
#define V VY #define V VY
#define R 2 #define R 2
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=3, N=6, V=y) // ssd: (R=3, N=6, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VNORMAL #define V VNORMAL
#define R 4 #define R 4
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
......
...@@ -57,7 +57,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -57,7 +57,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
// output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -125,7 +125,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text ...@@ -125,7 +125,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -183,7 +183,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -183,7 +183,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -242,7 +242,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text ...@@ -242,7 +242,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -283,7 +283,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText ...@@ -283,7 +283,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText
output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
} }
// output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -312,25 +312,25 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -312,25 +312,25 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]; float4 output = float4(biase[gid.z]);
half4 input; float4 input;
for (uint i = 0; i < input_arr_size; ++i) { for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); input = float4(inTexture.sample(sample, float2(posInInput.x, posInInput.y), i));
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; float4 weight_x = float4(weights[weithTo + 0 * kernelHXW * input_arr_size + i]);
output.x += dot(input, weight_x); output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; float4 weight_y = float4(weights[weithTo + 1 * kernelHXW * input_arr_size + i]);
output.y += dot(input, weight_y); output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; float4 weight_z = float4(weights[weithTo + 2 * kernelHXW * input_arr_size + i]);
output.z += dot(input, weight_z); output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; float4 weight_w = float4(weights[weithTo + 3 * kernelHXW * input_arr_size + i]);
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(half4(output), gid.xy, gid.z);
} }
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
...@@ -384,7 +384,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -384,7 +384,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(float4(input[j]), float4(weight_w)); output.w += dot(float4(input[j]), float4(weight_w));
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -424,7 +424,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in ...@@ -424,7 +424,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -483,7 +483,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -483,7 +483,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -542,7 +542,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -542,7 +542,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
......
...@@ -67,7 +67,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -67,7 +67,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z]; VECTOR(P, 4) alpha_value = alpha[gid.z];
...@@ -166,7 +166,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -166,7 +166,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z]; VECTOR(P, 4) alpha_value = alpha[gid.z];
......
...@@ -18,45 +18,45 @@ using namespace metal; ...@@ -18,45 +18,45 @@ using namespace metal;
#define P float #define P float
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
#define P half #define P half
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
......
...@@ -58,7 +58,7 @@ kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[text ...@@ -58,7 +58,7 @@ kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[text
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
// int input_height = inTexture.get_height(); // int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z); const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input; output[gid.y * input_width + gid.x] = input;
} }
...@@ -73,7 +73,7 @@ kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[ ...@@ -73,7 +73,7 @@ kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
// int input_height = inTexture.get_height(); // int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z); const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input); output[gid.y * input_width + gid.x] = float4(input);
} }
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
using namespace metal; using namespace metal;
struct resize_bilinear_param { struct resize_bilinear_param {
// int32_t out_h; // int32_t out_h;
// int32_t out_w; // int32_t out_w;
float ratio_h; float ratio_h;
float ratio_w; float ratio_w;
}; };
......
...@@ -27,7 +27,7 @@ kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[textur ...@@ -27,7 +27,7 @@ kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[textur
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
// int zsize = inTexture.get_array_size(); // int zsize = inTexture.get_array_size();
P maxv = inTexture.read(uint2(0, gid.y), 0)[0]; P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
int group = sp.K / 4; int group = sp.K / 4;
int remain = sp.K % 4; int remain = sp.K % 4;
......
...@@ -36,29 +36,29 @@ struct SplitParam { ...@@ -36,29 +36,29 @@ struct SplitParam {
//// ssd-ar: (R=3, N=2, V=y) //// ssd-ar: (R=3, N=2, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 2 #define N 2
#define P float #define P float
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#define P half #define P half
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
//// ssd-ar: (R=2, N=2, V=y) //// ssd-ar: (R=2, N=2, V=y)
#define V VY #define V VY
#define R 2 #define R 2
#define N 2 #define N 2
#define P float #define P float
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#define P half #define P half
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
...@@ -36,28 +36,28 @@ kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[ ...@@ -36,28 +36,28 @@ kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[
} }
#define R 4 #define R 4
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
#define R 3 #define R 3
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
#define R 2 #define R 2
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
...@@ -27,8 +27,8 @@ class ViewController: UIViewController { ...@@ -27,8 +27,8 @@ class ViewController: UIViewController {
inQueue: queue inQueue: queue
) )
test.testConcat() test.testConcat()
// test.testReshape() // test.testReshape()
// test.testTranspose() // test.testTranspose()
print(" done ") print(" done ")
} }
......
...@@ -741,7 +741,7 @@ ...@@ -741,7 +741,7 @@
CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEFINES_MODULE = YES; DEFINES_MODULE = YES;
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = A798K58VVL;
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
...@@ -778,7 +778,7 @@ ...@@ -778,7 +778,7 @@
CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEFINES_MODULE = YES; DEFINES_MODULE = YES;
DEVELOPMENT_TEAM = ""; DEVELOPMENT_TEAM = A798K58VVL;
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
......
...@@ -35,4 +35,6 @@ import Foundation ...@@ -35,4 +35,6 @@ import Foundation
/// 运算精度, runner 生命周期中不可变 /// 运算精度, runner 生命周期中不可变
@objc public var computePrecision: ComputePrecision = .Float16 @objc public var computePrecision: ComputePrecision = .Float16
/// 是否开启 log
@objc public var debug: Bool = true;
} }
...@@ -377,8 +377,8 @@ extension MTLComputeCommandEncoder { ...@@ -377,8 +377,8 @@ extension MTLComputeCommandEncoder {
let height = computePipline.maxTotalThreadsPerThreadgroup/width let height = computePipline.maxTotalThreadsPerThreadgroup/width
let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1) let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
// print(" thread: threads per group: \(threadsPerGroup) ") // print(" thread: threads per group: \(threadsPerGroup) ")
// print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)") // print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
let groupWidth = (outTexture.width + width - 1)/width let groupWidth = (outTexture.width + width - 1)/width
let groupHeight = (outTexture.height + height - 1)/height let groupHeight = (outTexture.height + height - 1)/height
...@@ -547,9 +547,9 @@ public extension MTLTexture { ...@@ -547,9 +547,9 @@ public extension MTLTexture {
} }
func realNHWC(dim: (n: Int, h: Int, w: Int, c: Int)) -> [Float32] { func realNHWC(dim: (n: Int, h: Int, w: Int, c: Int)) -> [Float32] {
// print("origin dim: \(dim)") // print("origin dim: \(dim)")
// print("texture: ") // print("texture: ")
// print(self) // print(self)
var textureArray: [Float32] var textureArray: [Float32]
if pixelFormat == .rgba32Float { if pixelFormat == .rgba32Float {
......
...@@ -89,135 +89,135 @@ public class PaddleMobileUnitTest { ...@@ -89,135 +89,135 @@ public class PaddleMobileUnitTest {
} }
public func testConcat() { public func testConcat() {
// let buffer = queue.makeCommandBuffer() ?! "buffer is nil" // let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// var it: [[Float32]] = [] // var it: [[Float32]] = []
// for _ in 0..<7 { // for _ in 0..<7 {
// it.append((0..<12).map { Float32($0) }) // it.append((0..<12).map { Float32($0) })
// } // }
// let input = it.map { device.tensor2texture(value: $0, dim: [3, 4]) } // let input = it.map { device.tensor2texture(value: $0, dim: [3, 4]) }
// let output = device.tensor2texture(value: [Float32](), dim: [3, 28]) // let output = device.tensor2texture(value: [Float32](), dim: [3, 28])
// //
// let param = ConcatTestParam.init( // let param = ConcatTestParam.init(
// input: input, // input: input,
// output: output, // output: output,
// dims: [[3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4]], // dims: [[3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4]],
// axis: 1, // axis: 1,
// odim: [3, 28] // odim: [3, 28]
// ) // )
// let concatKernel = ConcatKernel<Float32>.init(device: device, testParam: param) // let concatKernel = ConcatKernel<Float32>.init(device: device, testParam: param)
// concatKernel.test(cmdBuffer: buffer, param: param) // concatKernel.test(cmdBuffer: buffer, param: param)
// buffer.addCompletedHandler { (buffer) in // buffer.addCompletedHandler { (buffer) in
// for i in 0..<it.count { // for i in 0..<it.count {
// let _: Float32? = input[i].logDesc() // let _: Float32? = input[i].logDesc()
// self.tensorPrint(tensor: it[i], dim: [3, 4]) // self.tensorPrint(tensor: it[i], dim: [3, 4])
// } // }
// let _: Float32? = output.logDesc() // let _: Float32? = output.logDesc()
// let tx: [Float32] = self.device.texture2tensor(texture: output, dim: [3, 28]) // let tx: [Float32] = self.device.texture2tensor(texture: output, dim: [3, 28])
// self.tensorPrint(tensor: tx, dim: [3, 28]) // self.tensorPrint(tensor: tx, dim: [3, 28])
// } // }
// //
// buffer.commit() // buffer.commit()
} }
public func testReshape() { public func testReshape() {
// let buffer = queue.makeCommandBuffer() ?! "buffer is nil" // let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// let input: [Float32] = (0..<24).map { Float32($0) } // let input: [Float32] = (0..<24).map { Float32($0) }
// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) // let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6]) // let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6])
// let mp = ReshapeMetalParam.init( // let mp = ReshapeMetalParam.init(
// idim: (1, 2, 3, 4), // idim: (1, 2, 3, 4),
// itrans: (0, 1, 2, 3), // itrans: (0, 1, 2, 3),
// odim: (1, 1, 4, 6), // odim: (1, 1, 4, 6),
// otrans: (0, 1, 2, 3) // otrans: (0, 1, 2, 3)
// ) // )
// let param = ReshapeTestParam.init( // let param = ReshapeTestParam.init(
// inputTexture: inTexture, // inputTexture: inTexture,
// outputTexture: outTexture, // outputTexture: outTexture,
// param: mp // param: mp
// ) // )
// let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param) // let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
// reshapeKernel.test(commandBuffer: buffer, testParam: param) // reshapeKernel.test(commandBuffer: buffer, testParam: param)
// buffer.addCompletedHandler { (buffer) in // buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inTexture.logDesc() // let _: Float32? = inTexture.logDesc()
// let _: Float32? = outTexture.logDesc() // let _: Float32? = outTexture.logDesc()
// self.tensorPrint(tensor: input, dim: [2, 3, 4]) // self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6]) // let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6])
// self.tensorPrint(tensor: tx, dim: [4, 6]) // self.tensorPrint(tensor: tx, dim: [4, 6])
// } // }
// let input: [Float32] = (0..<24).map { Float32($0) } // let input: [Float32] = (0..<24).map { Float32($0) }
// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) // let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outTexture = device.tensor2texture(value: [Float32](), dim: [24]) // let outTexture = device.tensor2texture(value: [Float32](), dim: [24])
// let mp = ReshapeMetalParam.init( // let mp = ReshapeMetalParam.init(
// idim: (1, 2, 3, 4), // idim: (1, 2, 3, 4),
// itrans: (0, 1, 2, 3), // itrans: (0, 1, 2, 3),
// odim: (1, 1, 1, 24), // odim: (1, 1, 1, 24),
// otrans: (0, 1, 2, 3) // otrans: (0, 1, 2, 3)
// ) // )
// let param = ReshapeTestParam.init( // let param = ReshapeTestParam.init(
// inputTexture: inTexture, // inputTexture: inTexture,
// outputTexture: outTexture, // outputTexture: outTexture,
// param: mp // param: mp
// ) // )
// let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param) // let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
// reshapeKernel.test(commandBuffer: buffer, testParam: param) // reshapeKernel.test(commandBuffer: buffer, testParam: param)
// buffer.addCompletedHandler { (buffer) in // buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inTexture.logDesc() // let _: Float32? = inTexture.logDesc()
// let _: Float32? = outTexture.logDesc() // let _: Float32? = outTexture.logDesc()
// self.tensorPrint(tensor: input, dim: [2, 3, 4]) // self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24]) // let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24])
// self.tensorPrint(tensor: tx, dim: [24]) // self.tensorPrint(tensor: tx, dim: [24])
// } // }
// //
// //
// buffer.commit() // buffer.commit()
} }
public func testTranspose() { public func testTranspose() {
let buffer = queue.makeCommandBuffer() ?! "buffer is nil" let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// var input: [Float32] = [] // var input: [Float32] = []
// for i in 0..<72 { // for i in 0..<72 {
// input.append(Float32(i)) // input.append(Float32(i))
// } // }
//// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3) //// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
// let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]); // let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
// // group 1 // // group 1
// let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4]) // let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0]) // let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1]) //// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
//// // group 2 //// // group 2
//// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6) //// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1]) //// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
//// ////
// let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param) // let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
// //
// transposeKernel.test(commandBuffer: buffer, param: param) // transposeKernel.test(commandBuffer: buffer, param: param)
// //
// buffer.addCompletedHandler { (buffer) in // buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) // let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) // let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// self.tensorPrint(tensor: input, dim: [4, 3, 2, 3]) // self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4]) // let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
// self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4]) // self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
// } // }
// //
// let input: [Float32] = (0..<24).map { Float32($0) } // let input: [Float32] = (0..<24).map { Float32($0) }
// let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) // let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2]) // let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1]) // let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1])
// let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param) // let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
// //
// transposeKernel.test(commandBuffer: buffer, param: param) // transposeKernel.test(commandBuffer: buffer, param: param)
// //
// buffer.addCompletedHandler { (buffer) in // buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) // let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) // let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// self.tensorPrint(tensor: input, dim: [2, 3, 4]) // self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2]) // let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2])
// self.tensorPrint(tensor: tx, dim: [3, 4, 2]) // self.tensorPrint(tensor: tx, dim: [3, 4, 2])
// } // }
// //
buffer.commit() buffer.commit()
} }
......
...@@ -280,12 +280,12 @@ public class FetchHolder: Variant { ...@@ -280,12 +280,12 @@ public class FetchHolder: Variant {
extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible { extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String { public var description: String {
fatalError() fatalError()
// return "\(result)" // return "\(result)"
} }
public var debugDescription: String { public var debugDescription: String {
fatalError() fatalError()
// return "\(result)" // return "\(result)"
} }
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
import Foundation import Foundation
let testTo = 5 let testTo = 5
var isTest = false var isTest = false
...@@ -113,18 +112,24 @@ public class Executor<P: PrecisionType> { ...@@ -113,18 +112,24 @@ public class Executor<P: PrecisionType> {
//将输入写进文件 //将输入写进文件
/* /*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim) print(dim)
writeToLibrary(fileName: "test_image_super", array: inputArr) writeToLibrary(fileName: "yolo_input", array: inputArr)
print(" write done ") print(" write done ")
return return
*/ */
/* 输出 op 计算结果
for op in SSelf.ops {
//输出 op 计算结果
if GlobalConfig.shared.debug {
for i in 0..<SSelf.ops.count {
print("第 \(i) 个 op: " )
let op = SSelf.ops[i]
op.delogOutput() op.delogOutput()
} }
*/ }
var resultHolder: GPUResultHolder var resultHolder: GPUResultHolder
if except > 0 { if except > 0 {
......
...@@ -155,7 +155,7 @@ public class Loader<P: PrecisionType> { ...@@ -155,7 +155,7 @@ public class Loader<P: PrecisionType> {
let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram) let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc) let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
// let programDesc = PMProgramDesc.init(protoProgram: protoProgram) // let programDesc = PMProgramDesc.init(protoProgram: protoProgram)
print(programDesc) print(programDesc)
...@@ -221,7 +221,7 @@ public class Loader<P: PrecisionType> { ...@@ -221,7 +221,7 @@ public class Loader<P: PrecisionType> {
} }
} else { } else {
if varDesc.name == fetchKey { if varDesc.name == fetchKey {
// scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0) // scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0)
} else if varDesc.name == feedKey { } else if varDesc.name == feedKey {
} }
} }
......
...@@ -97,7 +97,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -97,7 +97,7 @@ class Tensor<P: PrecisionType>: Tensorial {
func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, padWhenOneC: Bool = false, convertToNHWC: Bool = true, withTranspose: Bool = false) { func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, padWhenOneC: Bool = false, convertToNHWC: Bool = true, withTranspose: Bool = false) {
if convertToNHWC { if convertToNHWC {
// print(layout) // print(layout)
convert(to: DataLayout.NHWC()) convert(to: DataLayout.NHWC())
} }
......
...@@ -46,17 +46,17 @@ extension InputTexture { ...@@ -46,17 +46,17 @@ extension InputTexture {
.height = b .height = b
.len = a * d + 3 / 4 .len = a * d + 3 / 4
低于 4 维的 tensor,transpose 必须为 [0, 1, 2, 3] 既不考虑 transpose 低于 4 维的 tensor,transpose 必须为 [0, 1, 2, 3] 既不考虑 transpose
// TODO transpose 对于低维 tensor 的扩展原则。。。 // TODO transpose 对于低维 tensor 的扩展原则。。。
// [a, b] -> [1, 1, a, b] transpose 必须为 [0, 1, x, x] // [a, b] -> [1, 1, a, b] transpose 必须为 [0, 1, x, x]
// [a] -> [1, 1, 1, a] transpose 必须为 [0, 1, 2, 3] // [a] -> [1, 1, 1, a] transpose 必须为 [0, 1, 2, 3]
// [a, b, c] -> [1, a, b, c] tranpose 必须为 [0, x, x, x] // [a, b, c] -> [1, a, b, c] tranpose 必须为 [0, x, x, x]
3 维 tensor [a, b, c] 对应的 texture_2darray, 3 维 tensor [a, b, c] 对应的 texture_2darray,
.width = c .width = c
.height = b .height = b
.len = a + 3 / 4 .len = a + 3 / 4
2 维 tensor [a, b] 对应的 texture_2darray 2 维 tensor [a, b] 对应的 texture_2darray
.width = b + 3 / 4 .width = b + 3 / 4
......
...@@ -45,7 +45,7 @@ extension Runable where Self: OperatorProtocol{ ...@@ -45,7 +45,7 @@ extension Runable where Self: OperatorProtocol{
} }
func inputVariant() -> [String : [MTLBuffer]] { func inputVariant() -> [String : [MTLBuffer]] {
// return [:] // return [:]
fatalError(" op \(type) need implement inputVariant") fatalError(" op \(type) need implement inputVariant")
} }
...@@ -202,4 +202,4 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out ...@@ -202,4 +202,4 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out
gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]), gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"]), gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"]),
gFusionConvAddType : (inputs: ["Input"], outputs: ["Out"]) gFusionConvAddType : (inputs: ["Input"], outputs: ["Out"])
] ]
...@@ -56,7 +56,7 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili ...@@ -56,7 +56,7 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili
print(" \(type) output: ") print(" \(type) output: ")
let device = para.output.metalTexture!.device let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
// print(outputArray) // print(outputArray)
print(outputArray.strideArray()) print(outputArray.strideArray())
} }
......
...@@ -93,22 +93,22 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -93,22 +93,22 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
} }
func delogOutput() { func delogOutput() {
// print("op \(type): ") // print("op \(type): ")
// print(" padding: ") // print(" padding: ")
// print(para.paddings) // print(para.paddings)
// print("stride: ") // print("stride: ")
// print(para.stride) // print(para.stride)
// print("dilations: ") // print("dilations: ")
// print(para.dilations) // print(para.dilations)
// print(" para input dim: ") // print(" para input dim: ")
// print(para.input.dim) // print(para.input.dim)
// print(" para filter dim: ") // print(" para filter dim: ")
// print(para.filter.dim) // print(para.filter.dim)
// print(" para output dim: ") // print(" para output dim: ")
// print(para.output.dim) // print(para.output.dim)
// print(" biase: ") // print(" biase: ")
// let biase: [Float32] = para.y.buffer.array() // let biase: [Float32] = para.y.buffer.array()
// print(biase) // print(biase)
print(" \(type) output: ") print(" \(type) output: ")
print(para.output.metalTexture) print(para.output.metalTexture)
......
...@@ -35,16 +35,16 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam { ...@@ -35,16 +35,16 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam {
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision) inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision)
} }
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) { // required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) // param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
// if computePrecision == .Float32 { // if computePrecision == .Float32 {
// super.init(device: device, inFunctionName: "elementwise_add") // super.init(device: device, inFunctionName: "elementwise_add")
// } else if computePrecision == .Float16 { // } else if computePrecision == .Float16 {
// super.init(device: device, inFunctionName: "elementwise_add_half") // super.init(device: device, inFunctionName: "elementwise_add_half")
// } else { // } else {
// fatalError() // fatalError()
// } // }
// } // }
var offset = axis var offset = axis
if axis == -1 { if axis == -1 {
...@@ -65,7 +65,7 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem ...@@ -65,7 +65,7 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem
typealias OpType = ElementwiseAddOp<P> typealias OpType = ElementwiseAddOp<P>
func inferShape() { func inferShape() {
// para.output.dim = para.input.dim // para.output.dim = para.input.dim
} }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册