提交 38acdb9d 编写于 作者: L liuruilong

update

上级 b068788c
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="4MS-jc-i6A"> <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait"> <device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/> <adaptation id="fullscreen"/>
</device> </device>
...@@ -303,7 +303,7 @@ ...@@ -303,7 +303,7 @@
</viewController> </viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/> <placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects> </objects>
<point key="canvasLocation" x="-514" y="-3"/> <point key="canvasLocation" x="-721" y="-427"/>
</scene> </scene>
</scenes> </scenes>
<resources> <resources>
......
...@@ -20,12 +20,6 @@ class MultiPredictViewController: UIViewController { ...@@ -20,12 +20,6 @@ class MultiPredictViewController: UIViewController {
let queue2 = MetalHelper.shared.device.makeCommandQueue() let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
} }
@IBAction func predictAct(_ sender: Any) { @IBAction func predictAct(_ sender: Any) {
......
...@@ -169,6 +169,8 @@ public class VideoCapture: NSObject { ...@@ -169,6 +169,8 @@ public class VideoCapture: NSObject {
} }
} }
@available(iOS 10.0, *)
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview, // Because lowering the capture device's FPS looks ugly in the preview,
...@@ -191,6 +193,7 @@ extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { ...@@ -191,6 +193,7 @@ extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
} }
} }
@available(iOS 10.0, *)
extension VideoCapture: AVCapturePhotoCaptureDelegate { extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput, public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?, didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
......
...@@ -50,7 +50,7 @@ class ViewController: UIViewController { ...@@ -50,7 +50,7 @@ class ViewController: UIViewController {
@IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView! @IBOutlet weak var videoView: UIView!
var videoCapture: VideoCapture! // var videoCapture: VideoCapture!
var selectImage: UIImage? var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>? var inputPointer: UnsafeMutablePointer<Float32>?
...@@ -106,6 +106,12 @@ class ViewController: UIViewController { ...@@ -106,6 +106,12 @@ class ViewController: UIViewController {
return return
} }
// for _ in 0..<10{
// runner.predict(texture: inTexture) { (success, resultHolder) in
// resultHolder?.releasePointer()
// }
// }
let startDate = Date.init() let startDate = Date.init()
for i in 0..<max { for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
...@@ -279,17 +285,18 @@ extension ViewController: VideoCaptureDelegate{ ...@@ -279,17 +285,18 @@ extension ViewController: VideoCaptureDelegate{
} }
func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) { // @available(iOS 10.0, *)
// if !bool1 { // func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) { //// if !bool1 {
self.predictTexture(texture: texture!) //// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
// } // self.predictTexture(texture: texture!)
//// }
//
// bool1 = true //
// } //// bool1 = true
//// }
} //
// }
} }
......
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; }; FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; }; FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; }; FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
FC4FD9792140E4980073E130 /* PaddleMobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobile.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; }; FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; }; FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; }; FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
...@@ -122,6 +122,12 @@ ...@@ -122,6 +122,12 @@
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; }; FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; }; FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; }; FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */; };
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */; };
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */; };
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */; };
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */; };
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */; };
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; }; FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; };
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; }; FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; }; FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
...@@ -199,7 +205,7 @@ ...@@ -199,7 +205,7 @@
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; }; FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; }; FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; }; FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobile.h; sourceTree = "<group>"; }; FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; }; FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; }; FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; }; FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
...@@ -251,6 +257,12 @@ ...@@ -251,6 +257,12 @@
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; }; FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; }; FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; }; FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.inc.metal; sourceTree = "<group>"; };
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.metal; sourceTree = "<group>"; };
FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; }; FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; };
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; }; FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; }; FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
...@@ -390,6 +402,8 @@ ...@@ -390,6 +402,8 @@
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */, FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */, FCEB684B212F093800D2448E /* PreluOp.swift */,
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */, FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */,
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */,
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */,
); );
path = Operators; path = Operators;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -439,6 +453,8 @@ ...@@ -439,6 +453,8 @@
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */, FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */, FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */, FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
); );
path = Kernels; path = Kernels;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -447,7 +463,7 @@ ...@@ -447,7 +463,7 @@
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */, FC4FD9782140E4980073E130 /* libpaddle-mobile.a */,
FC4FD9772140E4980073E130 /* PaddleMobile.h */, FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */,
); );
path = CPU; path = CPU;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -506,6 +522,8 @@ ...@@ -506,6 +522,8 @@
FC803BC6214CBA820094B8E5 /* Macro.metal */, FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */, FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */, FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
); );
path = metal; path = metal;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -517,7 +535,7 @@ ...@@ -517,7 +535,7 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC4FD9792140E4980073E130 /* PaddleMobile.h in Headers */, FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */, FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */, FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */, 4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
...@@ -632,8 +650,10 @@ ...@@ -632,8 +650,10 @@
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */, 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */, FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
...@@ -668,6 +688,7 @@ ...@@ -668,6 +688,7 @@
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */, FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */, FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */, FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */, 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */, FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
...@@ -700,6 +721,7 @@ ...@@ -700,6 +721,7 @@
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */, 4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */,
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */, FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */, FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */,
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */, FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */, FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
...@@ -718,7 +740,9 @@ ...@@ -718,7 +740,9 @@
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */, FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */, FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */, FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */, 4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
</AdditionalOptions> </AdditionalOptions>
</TestAction> </TestAction>
<LaunchAction <LaunchAction
buildConfiguration = "Debug" buildConfiguration = "Release"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0" launchStyle = "0"
......
...@@ -90,32 +90,32 @@ public class MobileNet_ssd_AR: Net{ ...@@ -90,32 +90,32 @@ public class MobileNet_ssd_AR: Net{
let v = program.scope[output]! let v = program.scope[output]!
let originTexture = v as! Texture<Float32> let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
program.scope[output] = originTexture program.scope[output] = originTexture
if i == 99 { if i == 99 {
opDesc.attrs["axis"] = 0 opDesc.attrs["axis"] = 0
} else { } else {
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
} }
} }
for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] { for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
let opDesc = program.programDesc.blocks[0].ops[i] let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first! let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]! let v = program.scope[output]!
let originTexture = v as! Texture<Float32> let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
} }
for i in [60, 101, 90, 97, 70, 80] { for i in [60, 101, 90, 97, 70, 80] {
let opDesc = program.programDesc.blocks[0].ops[i] let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first! let output = opDesc.outputs["Out"]!.first!
...@@ -124,7 +124,7 @@ public class MobileNet_ssd_AR: Net{ ...@@ -124,7 +124,7 @@ public class MobileNet_ssd_AR: Net{
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
} }
for i in [102] { for i in [102] {
let opDesc = program.programDesc.blocks[0].ops[i] let opDesc = program.programDesc.blocks[0].ops[i]
for output in opDesc.outputs["Out"]! { for output in opDesc.outputs["Out"]! {
......
...@@ -65,7 +65,9 @@ class OpCreator<P: PrecisionType> { ...@@ -65,7 +65,9 @@ class OpCreator<P: PrecisionType> {
gSplit : SplitOp<P>.creat, gSplit : SplitOp<P>.creat,
gShape : ShapeOp<P>.creat, gShape : ShapeOp<P>.creat,
gFlatten : FlattenOp<P>.creat, gFlatten : FlattenOp<P>.creat,
gConvAddPreluType : ConvAddPreluOp<P>.creat] gConvAddPreluType : ConvAddPreluOp<P>.creat,
gConvAddAddPreluType : ConvAddAddPreluOp<P>.creat,
gElementwiseAddPreluType: ElementwiseAddPreluOp<P>.creat]
private init(){} private init(){}
} }
...@@ -157,6 +157,9 @@ let gSplit = "split" ...@@ -157,6 +157,9 @@ let gSplit = "split"
let gShape = "shape" let gShape = "shape"
let gFlatten = "flatten" let gFlatten = "flatten"
let gConvAddPreluType = "conv_add_prelu" let gConvAddPreluType = "conv_add_prelu"
let gConvAddAddPreluType = "conv_add_add_prelu"
let gElementwiseAddPreluType = "elementwise_add_prelu"
let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]), let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]),
gBatchNormType : (inputs: ["X"], outputs: ["Y"]), gBatchNormType : (inputs: ["X"], outputs: ["Y"]),
...@@ -183,6 +186,7 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out ...@@ -183,6 +186,7 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out
gSplit : (inputs: ["X"], outputs: ["Out"]), gSplit : (inputs: ["X"], outputs: ["Out"]),
gShape : (inputs: ["Input"], outputs: ["Out"]), gShape : (inputs: ["Input"], outputs: ["Out"]),
gFlatten : (inputs: ["X"], outputs: ["Out"]), gFlatten : (inputs: ["X"], outputs: ["Out"]),
gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]) gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"])
] ]
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
alpha = try ConvAddAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ConvAddAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
y = try ConvAddAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvAddAddPreluOp<P: PrecisionType>: Operator<ConvAddAddPreluKernel<P>, ConvAddAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddAddPreluOp<P>
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddAddPreluType
}
static func needCheck() -> [(Int, String)] {
return [(2, "Y"), (2, "X")]
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
inputX = try ElementwiseAddPreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ElementwiseAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddPreluParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
}
do {
inputY = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch _ {
let tensorY: Tensor<P> = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
// if computePrecision == .Float32 {
// super.init(device: device, inFunctionName: "elementwise_add")
// } else if computePrecision == .Float16 {
// super.init(device: device, inFunctionName: "elementwise_add_half")
// } else {
// fatalError()
// }
// }
var offset = axis
if axis == -1 {
offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
}
for i in 0..<(inputY.tensorDim.cout()) {
assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
}
}
let mode: String
let alpha: Tensor<P>
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var axis: Int
}
class ElementwiseAddPreluOp<P: PrecisionType>: Operator<ElementwiseAddPreluKernel<P>, ElementwiseAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gElementwiseAddType)
_ = beginNode
--> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gElementwiseAddPreluType
}
typealias OpType = ElementwiseAddPreluOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output)
let padToFourDim = para.output.padToFourDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
}
} else {
fatalError(" unsupport yet ")
}
} else {
fatalError()
}
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
// print(" function: \(functionName)")
// print("offset x: \(offsetX)")
// print("offset y: \(offsetY)")
let offsetZ = 0.0
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_element_float")
} else {
super.init(device: device, inFunctionName: "elementwise_add_prelu_float")
}
} else if computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
}
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
...@@ -40,7 +40,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -40,7 +40,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); float4 output = biase[gid.z];
float4 input; float4 input;
for (uint i = 0; i < input_arr_size; ++i) { for (uint i = 0; i < input_arr_size; ++i) {
...@@ -57,7 +57,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -57,7 +57,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -85,7 +85,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text ...@@ -85,7 +85,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); float4 output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
...@@ -125,7 +125,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text ...@@ -125,7 +125,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -153,7 +153,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -153,7 +153,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); float4 output = biase[gid.z];
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
float4 input[5]; float4 input[5];
...@@ -183,7 +183,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text ...@@ -183,7 +183,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -212,7 +212,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text ...@@ -212,7 +212,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); float4 output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
float4 input[5]; float4 input[5];
...@@ -242,7 +242,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text ...@@ -242,7 +242,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -265,7 +265,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText ...@@ -265,7 +265,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9; const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4; uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0); float4 output = biase[gid.z];
float4 inputs[9]; float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
...@@ -283,7 +283,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText ...@@ -283,7 +283,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText
output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
} }
output = output + biase[gid.z]; // output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -312,7 +312,7 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -312,7 +312,7 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); half4 output = biase[gid.z];
half4 input; half4 input;
for (uint i = 0; i < input_arr_size; ++i) { for (uint i = 0; i < input_arr_size; ++i) {
...@@ -329,8 +329,8 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -329,8 +329,8 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
...@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); half4 output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
...@@ -384,8 +384,8 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -384,8 +384,8 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(float4(input[j]), float4(weight_w)); output.w += dot(float4(input[j]), float4(weight_w));
} }
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
...@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in ...@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9; const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4; uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0); half4 output = biase[gid.z];
half4 inputs[9]; half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
...@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in ...@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) { for (int j = 0; j < 9; ++j) {
half4 input = inputs[j]; half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]); output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]); output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]); output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); half4 output = biase[gid.z];
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
half4 input[5]; half4 input[5];
...@@ -480,11 +480,11 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -480,11 +480,11 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
output.z += dot(input[j], weight_z); output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w)); output.w += dot(input[j], weight_w);
} }
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
...@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); half4 output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
half4 input[5]; half4 input[5];
...@@ -542,8 +542,8 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -542,8 +542,8 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
......
...@@ -49,7 +49,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -49,7 +49,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) input; VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) { for (uint i = 0; i < input_arr_size; ++i) {
...@@ -67,7 +67,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -67,7 +67,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input, weight_w); output.w += dot(input, weight_w);
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z]; VECTOR(P, 4) alpha_value = alpha[gid.z];
...@@ -126,7 +126,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -126,7 +126,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0); VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
...@@ -166,7 +166,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -166,7 +166,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input[j], weight_w); output.w += dot(input[j], weight_w);
} }
} }
output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL #ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z]; VECTOR(P, 4) alpha_value = alpha[gid.z];
...@@ -226,7 +226,7 @@ kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -226,7 +226,7 @@ kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]);; VECTOR(P, 4) output = biase[gid.z];;
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5]; VECTOR(P, 4) input[5];
...@@ -316,7 +316,7 @@ kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sampl ...@@ -316,7 +316,7 @@ kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]); VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5]; VECTOR(P, 4) input[5];
...@@ -399,7 +399,7 @@ kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, acc ...@@ -399,7 +399,7 @@ kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, acc
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9; const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4; uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(biase[gid.z]); VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) inputs[9]; VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
...@@ -69,6 +69,7 @@ class Node { ...@@ -69,6 +69,7 @@ class Node {
func to(depth: UInt) -> Node { func to(depth: UInt) -> Node {
let beginNode = Node.init(inType: type) let beginNode = Node.init(inType: type)
beginNode.opDesc = opDesc
to(depth: depth - 1, withNode: beginNode) to(depth: depth - 1, withNode: beginNode)
return beginNode return beginNode
} }
...@@ -130,6 +131,7 @@ class Node { ...@@ -130,6 +131,7 @@ class Node {
for output in outputs { for output in outputs {
let node = Node.init(inType: output.type) let node = Node.init(inType: output.type)
node.opDesc = output.opDesc
withNode.outputs.append(node) withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node) output.to(depth: depth - 1, withNode: node)
} }
...@@ -182,10 +184,12 @@ extension Node: Equatable { ...@@ -182,10 +184,12 @@ extension Node: Equatable {
class ProgramOptimize<P: PrecisionType> { class ProgramOptimize<P: PrecisionType> {
// register fusion // register fusion
let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self, let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
// ConvAddAddPreluOp<P>.self,
ConvAddPreluOp<P>.self, ConvAddPreluOp<P>.self,
ConvAddOp<P>.self, ConvAddOp<P>.self,
ConvBNReluOp<P>.self, ConvBNReluOp<P>.self,
DwConvBNReluOp<P>.self DwConvBNReluOp<P>.self,
// ElementwiseAddPreluOp<P>.self
] ]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
...@@ -256,6 +260,15 @@ class ProgramOptimize<P: PrecisionType> { ...@@ -256,6 +260,15 @@ class ProgramOptimize<P: PrecisionType> {
} }
} }
} }
let paramInputToChecks = checkNode.opDesc?.paraInputs[toCheck.1] ?? []
for paramInputToCheck in paramInputToChecks {
if node.output[paramInputToCheck] == nil {
if relationshipMap[paramInputToCheck] == nil {
canFolder = false
}
}
}
} }
} }
......
...@@ -83,7 +83,7 @@ public class Executor<P: PrecisionType> { ...@@ -83,7 +83,7 @@ public class Executor<P: PrecisionType> {
for block in inProgram.programDesc.blocks { for block in inProgram.programDesc.blocks {
//block.ops.count //block.ops.count
for i in 0..<block.ops.count { for i in 0..<block.ops.count {
let op = block.ops[i] let opDesc = block.ops[i]
do { do {
// print("in for i \(i): ") // print("in for i \(i): ")
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"]) // print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
...@@ -93,7 +93,7 @@ public class Executor<P: PrecisionType> { ...@@ -93,7 +93,7 @@ public class Executor<P: PrecisionType> {
// //
// } // }
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
ops.append(op) ops.append(op)
} catch let error { } catch let error {
throw error throw error
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#pragma once #pragma once
#import "PaddleMobile.h" #import "PaddleMobileCPU.h"
#import "CPUCompute.h" #import "CPUCompute.h"
#import "PaddleMobileGPU.h" #import "PaddleMobileGPU.h"
#import <UIKit/UIKit.h> #import <UIKit/UIKit.h>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册