未验证 提交 13f2079a 编写于 作者: J Jiaying Zhao 提交者: GitHub

Merge branch 'develop' into develop

...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*.lai *.lai
*.la *.la
*.lib *.lib
*.a
# Executables # Executables
*.exe *.exe
...@@ -70,7 +71,10 @@ build ...@@ -70,7 +71,10 @@ build
cmake-build-debug cmake-build-debug
cmake-build-release cmake-build-release
#ios demo # ios
tools/libomp.a
# ios demo
demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/ demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/
demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg
demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a
...@@ -84,6 +88,7 @@ SwiftProtobuf.framework ...@@ -84,6 +88,7 @@ SwiftProtobuf.framework
paddle-mobile.xcworkspace paddle-mobile.xcworkspace
metal/models/ metal/models/
metal/images/ metal/images/
*.a
metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
tools/libomp.a *.xcuserdatad/
\ No newline at end of file */xcuserdata/
...@@ -69,8 +69,18 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ...@@ -69,8 +69,18 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平
- **苹果设备的GPU Metal实现** - **苹果设备的GPU Metal实现**
基于Metal实现的苹果设备的GPU预测库,也已经在实现中,近期也会有相应可运行版本。 |mobilenetfssd|速度|
|------------|-----|
|A9(ms)|33.78|
|A10(ms)|24.05|
|A11(ms)|17.15|
|||
|genet|速度|
|A9(ms) |3.49|
|A10(ms)|2.54|
|A11(ms)|1.43|
- **FPGA** - **FPGA**
FPGA实现正在进行中,是基于Xilinx的ZU5目标开发板。 FPGA实现正在进行中,是基于Xilinx的ZU5目标开发板。
......
...@@ -8,22 +8,29 @@ ...@@ -8,22 +8,29 @@
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; }; 30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; };
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; }; FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; }; FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; }; FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; }; FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; }; FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; }; FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; }; FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; }; FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; };
FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; }; FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */; };
FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; }; FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B632152858600DECA15 /* hand.jpg.zip */; };
FCFE9B6A2152858600DECA15 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B642152858600DECA15 /* synset.txt */; };
FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B652152858600DECA15 /* banana.jpeg */; };
FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B662152858600DECA15 /* hand.jpg */; };
FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B672152858600DECA15 /* iphone.JPG */; };
FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B682152858600DECA15 /* paddle-mobile.png */; };
FCFE9C512152859600DECA15 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B752152859500DECA15 /* genet_params */; };
FCFE9C522152859600DECA15 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B762152859500DECA15 /* genet_model */; };
FCFE9D232152859600DECA15 /* ar_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4C2152859500DECA15 /* ar_model */; };
FCFE9D242152859600DECA15 /* ar_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4D2152859500DECA15 /* ar_params */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */ /* Begin PBXCopyFilesBuildPhase section */
...@@ -44,6 +51,8 @@ ...@@ -44,6 +51,8 @@
081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = "<group>"; }; 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = "<group>"; };
18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; }; 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = "<group>"; };
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = "<group>"; };
FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; }; FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; }; FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; }; FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
...@@ -52,15 +61,23 @@ ...@@ -52,15 +61,23 @@
FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; }; FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; }; FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-unit-test/paddle-mobile-unit-test/MetalHelper.swift"; sourceTree = "<group>"; }; FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; };
FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; }; FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; }; FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; }; FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; }; FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelHelper.swift; sourceTree = "<group>"; };
FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; }; FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
FCFE9B632152858600DECA15 /* hand.jpg.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = hand.jpg.zip; sourceTree = "<group>"; };
FCFE9B642152858600DECA15 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCFE9B652152858600DECA15 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
FCFE9B662152858600DECA15 /* hand.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = hand.jpg; sourceTree = "<group>"; };
FCFE9B672152858600DECA15 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
FCFE9B682152858600DECA15 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
FCFE9B752152859500DECA15 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = "<group>"; };
FCFE9B762152859500DECA15 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = "<group>"; };
FCFE9C4C2152859500DECA15 /* ar_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_model; sourceTree = "<group>"; };
FCFE9C4D2152859500DECA15 /* ar_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_params; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
...@@ -88,6 +105,7 @@ ...@@ -88,6 +105,7 @@
7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = { 7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC4FD97B2140EE250073E130 /* libc++.tbd */,
18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */, 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */,
); );
name = Frameworks; name = Frameworks;
...@@ -115,49 +133,82 @@ ...@@ -115,49 +133,82 @@
FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = { FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC0E2C2020EDC03B009C1FAC /* models */, FCFE9B6F2152859500DECA15 /* models */,
FC0E2C1D20EDC030009C1FAC /* images */, FCFE9B622152858600DECA15 /* images */,
FC803BCA214D27920094B8E5 /* VideoCapture */,
FC8CFED2213519540094D569 /* Net */,
FC039B8120E11C550081E9F8 /* AppDelegate.swift */, FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
FC013927210204A3008100E3 /* PreProcessKernel.metal */,
FC039B8320E11C550081E9F8 /* ViewController.swift */, FC039B8320E11C550081E9F8 /* ViewController.swift */,
FC039B8520E11C550081E9F8 /* Main.storyboard */, FC039B8520E11C550081E9F8 /* Main.storyboard */,
FC039B8820E11C560081E9F8 /* Assets.xcassets */, FC039B8820E11C560081E9F8 /* Assets.xcassets */,
FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */, FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
FC039B8D20E11C560081E9F8 /* Info.plist */, FC039B8D20E11C560081E9F8 /* Info.plist */,
FC3602C72108580600FACB58 /* MetalHelper.swift */, FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */, FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */,
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
); );
path = "paddle-mobile-demo"; path = "paddle-mobile-demo";
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC0E2C1D20EDC030009C1FAC /* images */ = { FC803BCA214D27920094B8E5 /* VideoCapture */ = {
isa = PBXGroup;
children = (
FC803BCB214D27920094B8E5 /* FPSCounter.swift */,
FC803BCC214D27920094B8E5 /* VideoCapture.swift */,
);
path = VideoCapture;
sourceTree = "<group>";
};
FC8CFED2213519540094D569 /* Net */ = {
isa = PBXGroup;
children = (
FC013927210204A3008100E3 /* PreProcessKernel.metal */,
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
);
path = Net;
sourceTree = "<group>";
};
FCFE9B622152858600DECA15 /* images */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC918192211DC70500B6F354 /* iphone.JPG */, FCFE9B632152858600DECA15 /* hand.jpg.zip */,
FC918190211DBC3500B6F354 /* paddle-mobile.png */, FCFE9B642152858600DECA15 /* synset.txt */,
FCDFD41A211D91C7005AB38B /* synset.txt */, FCFE9B652152858600DECA15 /* banana.jpeg */,
FCEEE7D3210627A000444BEC /* banana.jpeg */, FCFE9B662152858600DECA15 /* hand.jpg */,
FCFE9B672152858600DECA15 /* iphone.JPG */,
FCFE9B682152858600DECA15 /* paddle-mobile.png */,
); );
name = images; name = images;
path = ../../images; path = ../../images;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC0E2C2020EDC03B009C1FAC /* models */ = { FCFE9B6F2152859500DECA15 /* models */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCD04E6020F3146A0007374F /* mobilenet */, FCFE9B742152859500DECA15 /* genet */,
FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */,
); );
name = models; name = models;
path = ../../models; path = ../../models;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCD04E6020F3146A0007374F /* mobilenet */ = { FCFE9B742152859500DECA15 /* genet */ = {
isa = PBXGroup;
children = (
FCFE9B752152859500DECA15 /* genet_params */,
FCFE9B762152859500DECA15 /* genet_model */,
);
path = genet;
sourceTree = "<group>";
};
FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCD04E6120F3146A0007374F /* params */, FCFE9C4C2152859500DECA15 /* ar_model */,
FCD04E6220F3146A0007374F /* model */, FCFE9C4D2152859500DECA15 /* ar_params */,
); );
path = mobilenet; path = fluid_fssd_new_ar;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
/* End PBXGroup section */ /* End PBXGroup section */
...@@ -195,6 +246,7 @@ ...@@ -195,6 +246,7 @@
TargetAttributes = { TargetAttributes = {
FC039B7D20E11C550081E9F8 = { FC039B7D20E11C550081E9F8 = {
CreatedOnToolsVersion = 9.3.1; CreatedOnToolsVersion = 9.3.1;
LastSwiftMigration = 0940;
}; };
}; };
}; };
...@@ -221,14 +273,18 @@ ...@@ -221,14 +273,18 @@
isa = PBXResourcesBuildPhase; isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FCD04E6320F3146B0007374F /* params in Resources */, FCFE9D232152859600DECA15 /* ar_model in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */, FCFE9C522152859600DECA15 /* genet_model in Resources */,
FCFE9D242152859600DECA15 /* ar_params in Resources */,
FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */,
FCFE9C512152859600DECA15 /* genet_params in Resources */,
FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */, FCFE9B6A2152858600DECA15 /* synset.txt in Resources */,
FC918193211DC70500B6F354 /* iphone.JPG in Resources */, FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */,
FCDFD41B211D91C7005AB38B /* synset.txt in Resources */, FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */,
FCD04E6420F3146B0007374F /* model in Resources */, FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */, FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -280,10 +336,13 @@ ...@@ -280,10 +336,13 @@
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */, FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */, FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */,
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */, FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */,
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */,
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */, FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
...@@ -428,19 +487,23 @@ ...@@ -428,19 +487,23 @@
baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */; baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */;
buildSettings = { buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL; DEVELOPMENT_TEAM = A798K58VVL;
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = ( LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)", "$(inherited)",
"@executable_path/Frameworks", "@executable_path/Frameworks",
); );
PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal; PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE = ""; PROVISIONING_PROFILE = "";
PROVISIONING_PROFILE_SPECIFIER = ""; PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h";
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
SWIFT_VERSION = 4.0; SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
...@@ -451,19 +514,22 @@ ...@@ -451,19 +514,22 @@
baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */; baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */;
buildSettings = { buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL; DEVELOPMENT_TEAM = A798K58VVL;
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = ( LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)", "$(inherited)",
"@executable_path/Frameworks", "@executable_path/Frameworks",
); );
PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal; PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE = ""; PROVISIONING_PROFILE = "";
PROVISIONING_PROFILE_SPECIFIER = ""; PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h";
SWIFT_VERSION = 4.0; SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "0940"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>paddle-mobile-demo.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>2</integer>
</dict>
</dict>
<key>SuppressBuildableAutocreation</key>
<dict>
<key>FC039B7D20E11C550081E9F8</key>
<dict>
<key>primary</key>
<true/>
</dict>
</dict>
</dict>
</plist>
...@@ -19,7 +19,6 @@ class AppDelegate: UIResponder, UIApplicationDelegate { ...@@ -19,7 +19,6 @@ class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow? var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool { func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch. // Override point for customization after application launch.
return true return true
......
...@@ -11,6 +11,34 @@ ...@@ -11,6 +11,34 @@
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/> <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies> </dependencies>
<scenes> <scenes>
<!--Multi Predict View Controller-->
<scene sceneID="ec4-AW-9Vs">
<objects>
<viewController id="Vwd-lt-764" customClass="MultiPredictViewController" customModule="paddle_mobile_demo" customModuleProvider="target" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="55D-rz-Ex6">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="TQt-X9-PdF">
<rect key="frame" x="164" y="318" width="46" height="30"/>
<state key="normal" title="Button"/>
<connections>
<action selector="predictAct:" destination="Vwd-lt-764" eventType="touchUpInside" id="d4z-Cv-6jY"/>
</connections>
</button>
</subviews>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstItem="TQt-X9-PdF" firstAttribute="centerY" secondItem="55D-rz-Ex6" secondAttribute="centerY" id="bL3-wr-TcH"/>
<constraint firstItem="TQt-X9-PdF" firstAttribute="centerX" secondItem="55D-rz-Ex6" secondAttribute="centerX" id="sBi-RQ-sJn"/>
</constraints>
<viewLayoutGuide key="safeArea" id="bsd-h4-RYZ"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="68E-SG-96s" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-559" y="686"/>
</scene>
<!--View Controller--> <!--View Controller-->
<scene sceneID="tne-QT-ifu"> <scene sceneID="tne-QT-ifu">
<objects> <objects>
...@@ -20,12 +48,11 @@ ...@@ -20,12 +48,11 @@
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/> <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews> <subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK"> <imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
<rect key="frame" x="0.0" y="20" width="375" height="247"/> <rect key="frame" x="0.0" y="20" width="225" height="247"/>
</imageView> </imageView>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L"> <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" horizontalCompressionResistancePriority="749" text="Platform:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
<rect key="frame" x="10" y="538" width="68" height="24"/> <rect key="frame" x="10" y="538" width="35" height="24"/>
<constraints> <constraints>
<constraint firstAttribute="width" constant="68" id="Q5J-tq-JSX"/>
<constraint firstAttribute="height" constant="24" id="SYv-As-Si8"/> <constraint firstAttribute="height" constant="24" id="SYv-As-Si8"/>
</constraints> </constraints>
<fontDescription key="fontDescription" type="system" pointSize="20"/> <fontDescription key="fontDescription" type="system" pointSize="20"/>
...@@ -33,12 +60,12 @@ ...@@ -33,12 +60,12 @@
<nil key="highlightedColor"/> <nil key="highlightedColor"/>
</label> </label>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr"> <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
<rect key="frame" x="88" y="510.5" width="287" height="80"/> <rect key="frame" x="55" y="510.5" width="320" height="80"/>
<constraints> <constraints>
<constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/> <constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
</constraints> </constraints>
</pickerView> </pickerView>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5"> <pickerView contentMode="scaleToFill" horizontalCompressionResistancePriority="749" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
<rect key="frame" x="85" y="401" width="290" height="80"/> <rect key="frame" x="85" y="401" width="290" height="80"/>
<constraints> <constraints>
<constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/> <constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/>
...@@ -47,7 +74,6 @@ ...@@ -47,7 +74,6 @@
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha"> <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
<rect key="frame" x="10" y="429" width="65" height="24"/> <rect key="frame" x="10" y="429" width="65" height="24"/>
<constraints> <constraints>
<constraint firstAttribute="width" constant="65" id="6oA-g2-Xq4"/>
<constraint firstAttribute="height" constant="24" id="EwE-B3-z2R"/> <constraint firstAttribute="height" constant="24" id="EwE-B3-z2R"/>
</constraints> </constraints>
<fontDescription key="fontDescription" type="system" pointSize="20"/> <fontDescription key="fontDescription" type="system" pointSize="20"/>
...@@ -142,9 +168,14 @@ ...@@ -142,9 +168,14 @@
<fontDescription key="fontDescription" type="system" pointSize="15"/> <fontDescription key="fontDescription" type="system" pointSize="15"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/> <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView> </textView>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Cil-py-NiA">
<rect key="frame" x="225" y="20" width="150" height="247"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</view>
</subviews> </subviews>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/> <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<constraints> <constraints>
<constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="Cil-py-NiA" secondAttribute="bottom" constant="10" id="16p-IK-b5X"/>
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/> <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/>
<constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/> <constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/>
<constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/> <constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/>
...@@ -159,11 +190,12 @@ ...@@ -159,11 +190,12 @@
<constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/> <constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/> <constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/>
<constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/> <constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/>
<constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="MeS-HQ-voE"/> <constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" constant="-150" id="MeS-HQ-voE"/>
<constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/> <constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/>
<constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/> <constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/>
<constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/> <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/> <constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="UNc-Et-9Yv"/>
<constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/> <constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/>
<constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/> <constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/>
<constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/> <constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/>
...@@ -175,10 +207,12 @@ ...@@ -175,10 +207,12 @@
<constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/> <constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/>
<constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/> <constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/>
<constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/> <constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="gGK-DB-ibv"/>
<constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/> <constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/> <constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/>
<constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/> <constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/> <constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="leading" secondItem="ZZh-fw-LwK" secondAttribute="trailing" id="teJ-PP-h2R"/>
<constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/> <constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/>
<constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/> <constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/>
<constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/> <constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/>
...@@ -195,11 +229,81 @@ ...@@ -195,11 +229,81 @@
<outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/> <outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/>
<outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/> <outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/>
<outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/> <outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/>
<outlet property="videoView" destination="Cil-py-NiA" id="QY2-BP-SNS"/>
</connections> </connections>
</viewController> </viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/> <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
</objects> </objects>
<point key="canvasLocation" x="-724" y="98.50074962518741"/> <point key="canvasLocation" x="-1449" y="-3"/>
</scene>
<!--Load Pointer View Controller-->
<scene sceneID="56v-9i-I4d">
<objects>
<viewController id="4MS-jc-i6A" customClass="LoadPointerViewController" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="VbZ-nk-rJR">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="2p5-S3-M4T">
<rect key="frame" x="16" y="63" width="240" height="128"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
</imageView>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="37q-nm-0H7">
<rect key="frame" x="38" y="610" width="42" height="30"/>
<constraints>
<constraint firstAttribute="height" constant="30" id="ofW-G3-KST"/>
<constraint firstAttribute="width" constant="42" id="pwd-tO-zcJ"/>
</constraints>
<state key="normal" title="Image"/>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="fAg-ai-yaA">
<rect key="frame" x="119" y="610" width="34" height="30"/>
<constraints>
<constraint firstAttribute="height" constant="30" id="IES-jf-Z1n"/>
<constraint firstAttribute="width" constant="34" id="jxK-Xn-WCE"/>
</constraints>
<state key="normal" title="Load"/>
<connections>
<action selector="loaderButtonPressed:" destination="4MS-jc-i6A" eventType="touchUpInside" id="3cy-PD-aiE"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="pdS-6e-Pd1">
<rect key="frame" x="185" y="610" width="49" height="30"/>
<constraints>
<constraint firstAttribute="width" constant="49" id="ddY-uM-fzA"/>
<constraint firstAttribute="height" constant="30" id="yKd-YL-UML"/>
</constraints>
<state key="normal" title="Predict"/>
<connections>
<action selector="predictButtonPressed:" destination="4MS-jc-i6A" eventType="touchUpInside" id="sOH-iT-s1w"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="DZa-sd-lY7">
<rect key="frame" x="279" y="610" width="34" height="30"/>
<constraints>
<constraint firstAttribute="width" constant="34" id="aSO-4q-PgA"/>
<constraint firstAttribute="height" constant="30" id="eAt-Uc-BxX"/>
</constraints>
<state key="normal" title="clear"/>
</button>
</subviews>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="37q-nm-0H7" secondAttribute="bottom" constant="27" id="4Wf-Uh-gvr"/>
<constraint firstItem="DZa-sd-lY7" firstAttribute="leading" secondItem="pdS-6e-Pd1" secondAttribute="trailing" constant="45" id="8dB-uI-cs9"/>
<constraint firstItem="fAg-ai-yaA" firstAttribute="leading" secondItem="37q-nm-0H7" secondAttribute="trailing" constant="39" id="EAV-Oq-jeD"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="fAg-ai-yaA" secondAttribute="bottom" constant="27" id="Px0-A9-Eql"/>
<constraint firstItem="pdS-6e-Pd1" firstAttribute="leading" secondItem="fAg-ai-yaA" secondAttribute="trailing" constant="32" id="ZUR-Nv-aNb"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="pdS-6e-Pd1" secondAttribute="bottom" constant="27" id="kPx-mt-ab9"/>
<constraint firstItem="37q-nm-0H7" firstAttribute="leading" secondItem="vsb-FH-h7h" secondAttribute="leading" constant="38" id="trH-Fq-sSv"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="DZa-sd-lY7" secondAttribute="bottom" constant="27" id="yNJ-hq-2Qg"/>
</constraints>
<viewLayoutGuide key="safeArea" id="vsb-FH-h7h"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-721" y="-427"/>
</scene> </scene>
</scenes> </scenes>
<resources> <resources>
......
//
// LoadPointerViewController.h
// paddle-mobile-demo
//
// Created by Xiao,Haichun on 2018/9/19.
// Copyright © 2018年 orange. All rights reserved.
//
#import <UIKit/UIKit.h>
@interface LoadPointerViewController : UIViewController
@end
//
// LoadPointerViewController.m
// paddle-mobile-demo
//
// Created by Xiao,Haichun on 2018/9/19.
// Copyright © 2018年 orange. All rights reserved.
//
#import "LoadPointerViewController.h"
#import <Metal/Metal.h>
#import "paddle-mobile-demo-Bridging-Header.h"
@interface LoadPointerViewController ()
@property (strong, nonatomic) id<MTLDevice> device;
@property (strong, nonatomic) id<MTLTexture> texture;
@property (strong, nonatomic) id<MTLCommandQueue> queue;
@property (strong, nonatomic) PaddleMobileGPU *runner;
@property (strong, nonatomic) ModelConfig *modelConfig;
@end
@implementation LoadPointerViewController
- (void)viewDidLoad {
[super viewDidLoad];
self.device = MTLCreateSystemDefaultDevice();
self.queue = [self.device newCommandQueue];
// Do any additional setup after loading the view.
// NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"genet_model" withExtension:nil].path;
// NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"genet_params" withExtension:nil].path;
NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"ar_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"ar_params" withExtension:nil].path;
long fileSize;
FILE *fp;
fp = fopen([modelPath UTF8String], "rb");
fseek(fp, 0, SEEK_END);
fileSize = ftell(fp);
rewind(fp);
void *buffer = malloc(fileSize);
fread(buffer, 1, fileSize, fp);
fclose(fp);
long paramfileSize;
FILE *parmaFilePointer;
parmaFilePointer = fopen([paramPath UTF8String], "rb");
fseek(parmaFilePointer, 0, SEEK_END);
paramfileSize = ftell(parmaFilePointer);
rewind(parmaFilePointer);
void *parmaBuffer = malloc(paramfileSize);
fread(parmaBuffer, 1, paramfileSize, parmaFilePointer);
fclose(parmaFilePointer);
_modelConfig = [[ModelConfig alloc] init];
// _modelConfig.means = @[[NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0]];
// _modelConfig.scale = 0.017;
// _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:128.], [NSNumber numberWithFloat:128.0],[NSNumber numberWithFloat:3.0]];
_modelConfig.means = @[[NSNumber numberWithFloat:103.94], [NSNumber numberWithFloat:116.78], [NSNumber numberWithFloat:123.68]];
_modelConfig.scale = 1;
_modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:160.], [NSNumber numberWithFloat:160.0],[NSNumber numberWithFloat:3.0]];
_modelConfig.modelPointer = buffer;
_modelConfig.modelSize = (int)fileSize;
_modelConfig.paramPointer = parmaBuffer;
_modelConfig.paramSize = (int)paramfileSize;
}
- (IBAction)loaderButtonPressed:(id)sender {
// _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:GenetType modelConfig:_modelConfig];
_runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:MobileNetSSDType modelConfig:_modelConfig];
[_runner load];
}
- (IBAction)predictButtonPressed:(id)sender {
[self predict];
}
- (id<MTLTexture>) createTextureFromImage:(UIImage*) image device:(id<MTLDevice>) device
{
image =[UIImage imageWithCGImage:[image CGImage]
scale:[image scale]
orientation: UIImageOrientationLeft];
NSLog(@"orientation and size and stuff %ld %f %f", (long)image.imageOrientation, image.size.width, image.size.height);
CGImageRef imageRef = image.CGImage;
size_t width = self.view.frame.size.width;
size_t height = self.view.frame.size.height;
size_t bitsPerComponent = CGImageGetBitsPerComponent(imageRef);
size_t bitsPerPixel = CGImageGetBitsPerPixel(imageRef);
CGColorSpaceRef colorSpace = CGImageGetColorSpace(imageRef);
CGImageAlphaInfo alphaInfo = CGImageGetAlphaInfo(imageRef);
// NSLog(@"%@ %u", colorSpace, alphaInfo);
CGBitmapInfo bitmapInfo = kCGBitmapByteOrderDefault | alphaInfo;
// NSLog(@"bitmap info %u", bitmapInfo);
CGContextRef context = CGBitmapContextCreate( NULL, width, height, bitsPerComponent, (bitsPerPixel / 8) * width, colorSpace, bitmapInfo);
if( !context )
{
NSLog(@"Failed to load image, probably an unsupported texture type");
return nil;
}
CGContextDrawImage( context, CGRectMake( 0, 0, width, height ), image.CGImage);
MTLPixelFormat format = MTLPixelFormatRGBA8Unorm;
MTLTextureDescriptor *texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format
width:width
height:height
mipmapped:NO];
id<MTLTexture> texture = [device newTextureWithDescriptor:texDesc];
[texture replaceRegion:MTLRegionMake2D(0, 0, width, height)
mipmapLevel:0
withBytes:CGBitmapContextGetData(context)
bytesPerRow:4 * width];
return texture;
}
- (void)predict {
_texture = [self createTextureFromImage:[UIImage imageNamed:@"hand.jpg"] device:self.device];
NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
NSInteger max = 428;
for (int i = 0;i < max; i ++) {
[_runner predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
if (success) {
if (i == max -1) {
double time = [[NSDate date] timeIntervalSince1970] - startTime;
time = (time/max)*1000;
NSLog(@"gap ==== %fms",time);
}
// for (int i = 0; i < result.count; i ++) {
// NSNumber *number = result[i];
// NSLog(@"result %d = %f:",i, [number floatValue]);
// }
}
}];
}
}
- (void)didReceiveMemoryWarning {
[super didReceiveMemoryWarning];
// Dispose of any resources that can be recreated.
}
/*
#pragma mark - Navigation
// In a storyboard-based application, you will often want to do a little preparation before navigation
- (void)prepareForSegue:(UIStoryboardSegue *)segue sender:(id)sender {
// Get the new view controller using [segue destinationViewController].
// Pass the selected object to the new view controller.
}
*/
@end
//
// MetalHelper.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/7/25.
// Copyright © 2018年 orange. All rights reserved.
//
import Metal
import MetalKit
import Foundation
import paddle_mobile
import MetalPerformanceShaders
class MetalHelper {
let device: MTLDevice
let queue: MTLCommandQueue
let textureLoader: MTKTextureLoader
static let shared: MetalHelper = MetalHelper.init()
private init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
}
static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
let tmpTextureDes = MTLTextureDescriptor.init()
tmpTextureDes.width = size.width
tmpTextureDes.height = size.height
tmpTextureDes.depth = 1
tmpTextureDes.usage = [.shaderRead, .shaderWrite]
tmpTextureDes.pixelFormat = .rgba32Float
tmpTextureDes.textureType = .type2D
tmpTextureDes.storageMode = .shared
tmpTextureDes.cpuCacheMode = .defaultCache
let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
let buffer = queue.makeCommandBuffer()
scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
buffer?.addCompletedHandler({ (buffer) in
complete(dest!)
})
buffer?.commit()
}
}
//
// ModelHelper.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/8/10.
// Copyright © 2018年 orange. All rights reserved.
//
import UIKit
import MetalKit
import Foundation
import paddle_mobile
import MetalPerformanceShaders
class PreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
let modelHelperMap: [SupportModel : ModelHelper] = [.mobilenet : MobileNetHelper.init()]
enum SupportModel: String{
case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] {
return [.mobilenet]
}
}
protocol ModelHelper {
var dim: [Int] { get }
var modelPath: String { get }
var paramPath: String { get }
var modelDir: String { get }
var preprocessKernel: CusomKernel { get }
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
func resultStr(res: [Float]) -> String
}
extension ModelHelper {
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
getTexture(resTexture)
}
}
}
struct MobileNetHelper: ModelHelper{
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String{
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
func resultStr(res: [Float]) -> String {
var s: [String] = []
res.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
var preprocessKernel: CusomKernel
let dim = [1, 224, 224, 3]
let modelPath: String
let paramPath: String
let modelDir: String
init() {
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = PreProccess.init(device: MetalHelper.shared.device)
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit
import paddle_mobile
class MultiPredictViewController: UIViewController {
var runner1: Runner!
var runner2: Runner!
override func viewDidLoad() {
super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
}
@IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load()
// DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load()
// if success {
// for i in 0..<10000 {
// print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ")
//// print(res)
// })
// }
// } else {
// print("load failed")
// }
// self.runner1.clear()
// }
// return
// DispatchQueue.global().async {
//// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg")
//// if success {
// for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ")
// print(res)
// })
// }
//// } else {
//// print("load failed")
//// }
//// self.runner2.clear()
// }
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import MetalKit
import Foundation
import paddle_mobile
class MetalHelper {
let device: MTLDevice
let queue: MTLCommandQueue
let textureLoader: MTKTextureLoader
static let shared: MetalHelper = MetalHelper.init()
private init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
}
}
//
// PaddleMobile.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/9/5.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void mobilenet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_ssd_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_ssd_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void genet_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilent_ar_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
}
kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(half4(input), gid);
}
//
// PreProcessKernel.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/7/20.
// Copyright © 2018年 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
import Foundation
import QuartzCore
public class FPSCounter {
private(set) public var fps: Double = 0
var frames = 0
var startTime: CFTimeInterval = 0
public func start() {
frames = 0
startTime = CACurrentMediaTime()
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
frames = 0
startTime = CACurrentMediaTime()
}
}
}
}
import UIKit
import Metal
import CoreVideo
import AVFoundation
@available(iOS 10.0, *)
@objc public protocol VideoCaptureDelegate: NSObjectProtocol {
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?)
}
/**
Simple interface to the iPhone's camera.
*/
@available(iOS 10.0, *)
public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer?
public weak var delegate: VideoCaptureDelegate?
public var fps = -1
private let device: MTLDevice?
private let videoOrientation: AVCaptureVideoOrientation
private var textureCache: CVMetalTextureCache?
private let captureSession = AVCaptureSession()
private let videoOutput = AVCaptureVideoDataOutput()
private let photoOutput = AVCapturePhotoOutput()
private let queue = DispatchQueue(label: "net.machinethink.camera-queue")
private var lastTimestamp = CMTime()
private let cameraPosition: AVCaptureDevice.Position
public init(device: MTLDevice? = nil, orientation: AVCaptureVideoOrientation = .portrait, position: AVCaptureDevice.Position = .back) {
self.device = device
self.videoOrientation = orientation
self.cameraPosition = position
super.init()
}
public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
completion: @escaping (Bool) -> Void) {
queue.async {
let success = self.setUpCamera(sessionPreset: sessionPreset)
DispatchQueue.main.async {
completion(success)
}
}
}
func fontCamera() -> AVCaptureDevice? {
let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices
return deveices.first
}
func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool {
if let inDevice = device{
guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, inDevice, nil, &textureCache) == kCVReturnSuccess else {
print("Error: could not create a texture cache")
return false
}
}
captureSession.beginConfiguration()
captureSession.sessionPreset = sessionPreset
var oCaptureDevice: AVCaptureDevice?
switch cameraPosition {
case .back:
oCaptureDevice = AVCaptureDevice.default(for: AVMediaType.video)
break
case .front:
oCaptureDevice = fontCamera()
break
default:
break
}
guard let captureDevice = oCaptureDevice else {
print("Error: no video devices available")
return false
}
guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
print("Error: could not create AVCaptureDeviceInput")
return false
}
if captureSession.canAddInput(videoInput) {
captureSession.addInput(videoInput)
}
let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
previewLayer.connection?.videoOrientation = self.videoOrientation
self.previewLayer = previewLayer
let settings: [String : Any] = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
]
videoOutput.videoSettings = settings
videoOutput.alwaysDiscardsLateVideoFrames = true
videoOutput.setSampleBufferDelegate(self, queue: queue)
if captureSession.canAddOutput(videoOutput) {
captureSession.addOutput(videoOutput)
}
// We want the buffers to be in portrait orientation otherwise they are
// rotated by 90 degrees. Need to set this _after_ addOutput()!
videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation
if captureSession.canAddOutput(photoOutput) {
captureSession.addOutput(photoOutput)
}
captureSession.commitConfiguration()
return true
}
public func start() {
if !captureSession.isRunning {
captureSession.startRunning()
}
}
public func stop() {
if captureSession.isRunning {
captureSession.stopRunning()
}
}
/* Captures a single frame of the camera input. */
public func capturePhoto() {
let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)])
settings.previewPhotoFormat = [
kCVPixelBufferPixelFormatTypeKey as String: settings.__availablePreviewPhotoPixelFormatTypes[0],
kCVPixelBufferWidthKey as String: 480,
kCVPixelBufferHeightKey as String: 360,
]
photoOutput.capturePhoto(with: settings, delegate: self)
}
func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? {
if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer)
let height = CVPixelBufferGetHeight(imageBuffer)
var texture: CVMetalTexture?
CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache, imageBuffer, nil, .bgra8Unorm, width, height, 0, &texture)
if let texture = texture {
return CVMetalTextureGetTexture(texture)
}
}
return nil
}
func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? {
if let sampleBuffer = sampleBuffer,
let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer)
let height = CVPixelBufferGetHeight(imageBuffer)
let rect = CGRect(x: 0, y: 0, width: CGFloat(width), height: CGFloat(height))
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let ciContext = CIContext(options: nil)
if let cgImage = ciContext.createCGImage(ciImage, from: rect) {
return UIImage(cgImage: cgImage)
}
}
return nil
}
}
@available(iOS 10.0, *)
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview,
// we capture at full speed but only call the delegate at its desired
// framerate. If `fps` is -1, we run at the full framerate.
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let deltaTime = timestamp - lastTimestamp
if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
lastTimestamp = timestamp
self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
}
}
}
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
print("dropped frame")
}
}
@available(iOS 10.0, *)
extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
resolvedSettings: AVCaptureResolvedPhotoSettings,
bracketSettings: AVCaptureBracketedStillImageSettings?,
error: Error?) {
var imageTexture: MTLTexture?
var previewImage: UIImage?
if error == nil {
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
}
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
}
}
}
}
...@@ -14,164 +14,292 @@ ...@@ -14,164 +14,292 @@
import UIKit import UIKit
import MetalKit import MetalKit
import CoreMedia
import paddle_mobile import paddle_mobile
import MetalPerformanceShaders import MetalPerformanceShaders
let threadSupport = [1] var platform: Platform = .GPU
let threadSupport: [(Platform, String)] = [(.GPU, "GPU"), (.CPU, "CPU")]
//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
let modelHelperMap: [SupportModel : Runner] = [
.genet : Runner.init(inNet: Genet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
.mobilenet_ssd_ar : Runner.init(inNet: MobileNet_ssd_AR.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)]
//, .genet : Genet.init()
//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
let netSupport: [SupportModel : Net] = [.genet : Genet.init(device: MetalHelper.shared.device), .mobilenet_ssd_ar : MobileNet_ssd_AR.init(device: MetalHelper.shared.device)]
enum SupportModel: String{
// case mobilenet = "mobilenet"
// case mobilenet_ssd = "mobilenetssd"
case genet = "genet"
case mobilenet_ssd_ar = "mobilenetssd_ar"
static func supportedModels() -> [SupportModel] {
// .mobilenet,
// .mobilenet_ssd,
return [.genet, .mobilenet_ssd_ar]
}
}
class ViewController: UIViewController { class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView! @IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView! @IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel! @IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView!
var selectImage: UIImage? @IBOutlet weak var videoView: UIView!
var program: Program? // var videoCapture: VideoCapture!
var executor: Executor<Float32>?
var modelType: SupportModel = .mobilenet var selectImage: UIImage?
var toPredictTexture: MTLTexture? var inputPointer: UnsafeMutablePointer<Float32>?
var modelHelper: ModelHelper { var modelType: SupportModel = SupportModel.supportedModels()[0]
return modelHelperMap[modelType] ?! " has no this type " var toPredictTexture: MTLTexture?
}
var threadNum = 1 var runner: Runner!
var threadNum = 1
@IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue, inPlatform: platform)
@IBAction func loadAct(_ sender: Any) { if platform == .CPU {
let inModelHelper = modelHelper if inputPointer == nil {
let queue = MetalHelper.shared.queue inputPointer = runner.preproccess(image: selectImage!.cgImage!)
let loader = Loader<Float32>.init()
do { }
let modelPath = inModelHelper.modelPath } else if platform == .GPU {
let paraPath = inModelHelper.paramPath if self.toPredictTexture == nil {
runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath) self?.toPredictTexture = texture
executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!)
} catch let error {
print(error)
} }
}
} else {
fatalError( " unsupport " )
} }
@IBAction func selectImageAct(_ sender: Any) { if runner.load() {
let imagePicker = UIImagePickerController() print(" load success ! ")
imagePicker.sourceType = .camera } else {
imagePicker.delegate = self print(" load error ! ")
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
executor?.clear()
program = nil
executor = nil
} }
}
@IBAction func predictAct(_ sender: Any) {
guard let inTexture = toPredictTexture else { @IBAction func selectImageAct(_ sender: Any) {
resultTextView.text = "请选择图片 ! " let imagePicker = UIImagePickerController()
return imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
let max = 50
switch platform {
case .GPU:
guard let inTexture = toPredictTexture else {
resultTextView.text = "请选择图片 ! "
return
}
for _ in 0..<10{
runner.predict(texture: inTexture) { (success, resultHolder) in
resultHolder?.releasePointer()
}
}
let startDate = Date.init()
for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
if success {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
// print(resultHolder!.result![0])
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
DispatchQueue.main.async {
resultHolder?.releasePointer()
}
// print("释放")
} }
// print("sleep before ")
guard let inExecutor = executor else { // usleep(33000)
resultTextView.text = "请先 load ! " // print("sleep after ")
return }
case .CPU:
guard let inInputPointer = inputPointer else {
fatalError( " need input pointer " )
}
for _ in 0..<10 {
runner.predict(inputPointer: inInputPointer) { (success, res) in
res?.releaseOutput()
} }
}
do {
let max = 100 let startDate = Date.init()
var startDate = Date.init() for i in 0..<max {
for i in 0..<max { runner.predict(inputPointer: inInputPointer) { [weak self](success, res) in
try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in guard let sSelf = self else {
guard let sSelf = self else { fatalError()
fatalError() }
} if success {
if i == max - 1 {
if i == (max / 2 - 1) { let time = Date.init().timeIntervalSince(startDate)
startDate = Date.init() DispatchQueue.main.async {
} // sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
if i == max - 1 { }
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
}
}
}, preProcessKernle: self.modelHelper.preprocessKernel)
} }
} catch let error { }
print(error) res?.releaseOutput()
} }
}
} }
}
override func viewDidLoad() {
super.viewDidLoad()
// if runner.load() {
// print(" load success ! ")
// } else {
// print(" load error ! ")
// }
//
modelPickerView.delegate = self
modelPickerView.dataSource = self
threadPickerView.delegate = self
threadPickerView.dataSource = self
selectImage = UIImage.init(named: "hand.jpg")
selectImageView.image = selectImage
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
override func viewDidLoad() { }
super.viewDidLoad()
modelPickerView.delegate = self
modelPickerView.dataSource = self
threadPickerView.delegate = self
threadPickerView.dataSource = self
selectImage = UIImage.init(named: "banana.jpeg")
selectImageView.image = selectImage
modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
} }
extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
func numberOfComponents(in pickerView: UIPickerView) -> Int { func numberOfComponents(in pickerView: UIPickerView) -> Int {
if pickerView == modelPickerView { if pickerView == modelPickerView {
return 1 return 1
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
return 1 return 1
} else { } else {
fatalError() fatalError()
}
} }
}
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
if pickerView == modelPickerView { func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
return SupportModel.supportedModels().count if pickerView == modelPickerView {
} else if pickerView == threadPickerView { return SupportModel.supportedModels().count
return threadSupport.count } else if pickerView == threadPickerView {
} else { return threadSupport.count
fatalError() } else {
} fatalError()
} }
}
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
if pickerView == modelPickerView { public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
return SupportModel.supportedModels()[row].rawValue if pickerView == modelPickerView {
} else if pickerView == threadPickerView { return SupportModel.supportedModels()[row].rawValue
return "\(threadSupport[row])" } else if pickerView == threadPickerView {
} else { return threadSupport[row].1
fatalError() } else {
} fatalError()
} }
}
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
if pickerView == modelPickerView { public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
self.modelType = SupportModel.supportedModels()[row] if pickerView == modelPickerView {
} else if pickerView == threadPickerView { self.modelType = SupportModel.supportedModels()[row]
self.threadNum = threadSupport[row] } else if pickerView == threadPickerView {
} else {
fatalError() platform = threadSupport[row].0
} } else {
fatalError()
} }
}
} }
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{
fatalError("no image") fatalError("no image")
} }
sSelf.selectImage = image sSelf.selectImage = image
sSelf.selectImageView.image = image sSelf.selectImageView.image = image
sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture sSelf.toPredictTexture = texture
}) })
}
} }
}
} }
var bool1 = false
extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0])
resultHolder?.releasePointer()
})
}
}
// @available(iOS 10.0, *)
// func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
//// if !bool1 {
//// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
// self.predictTexture(texture: texture!)
//// }
//
//
//// bool1 = true
//// }
//
// }
}
//
// Use this file to import your target's public headers that you would like to expose to Swift.
//
#import <paddle_mobile/paddle_mobile.h>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>paddle-mobile-unit-test.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>6</integer>
</dict>
</dict>
</dict>
</plist>
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// AppDelegate.swift
// paddle-mobile-unit-test Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/8/10. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit import UIKit
......
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// ViewController.swift
// paddle-mobile-unit-test Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/8/10. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit import UIKit
import Metal
//import MetalKit
import paddle_mobile import paddle_mobile
class ViewController: UIViewController { class ViewController: UIViewController {
override func viewDidLoad() { override func viewDidLoad() {
super.viewDidLoad() super.viewDidLoad()
let device = Metal.MTLCreateSystemDefaultDevice()!
let queue = device.makeCommandQueue()!
let test = PaddleMobileUnitTest.init(
inDevice: device,
inQueue: queue
)
test.testConcat()
// test.testReshape()
// test.testTranspose()
print(" done ") print(" done ")
} }
......
...@@ -7,7 +7,31 @@ ...@@ -7,7 +7,31 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; };
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; };
4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; };
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8B2146640900D0F791 /* SplitOp.swift */; };
4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */; };
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8F214664CD00D0F791 /* Split.metal */; };
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; };
4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; };
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; };
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */ = {isa = PBXBuildFile; fileRef = 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */; };
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; };
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; };
4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; };
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA5214B5F6800D0F791 /* Shape.metal */; };
4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */; };
4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */; };
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */; };
4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */; };
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; };
4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; };
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; };
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; };
D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; }; D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; };
FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226552138F33800F395E2 /* TransposeKernel.metal */; };
FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226572138F38D00F395E2 /* PoolKernel.metal */; };
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; }; FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; };
FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; }; FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; };
...@@ -35,17 +59,54 @@ ...@@ -35,17 +59,54 @@
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; }; FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; }; FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; }; FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; }; FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC292C5521421B4600CF622F /* PaddleMobileGPU.m */; };
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7C214255BC00CF622F /* CPUCompute.mm */; };
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7E214255BC00CF622F /* MobileNetSSD.swift */; };
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C7D214255BC00CF622F /* CPUCompute.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC292C872142624800CF622F /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C862142624800CF622F /* Genet.swift */; };
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC33B0EF2147659000714A93 /* MobileNet.swift */; };
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; }; FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; }; FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; }; FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; }; FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; }; FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; };
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */; };
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */; };
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */; };
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; };
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; };
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; }; FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; };
FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; }; FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; }; FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; }; FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; }; FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */; };
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1642132A5EB00084FE5 /* Common.metal */; };
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */; };
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; };
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */; };
FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */; };
FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */; };
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */; };
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC642122FCD700D94F7E /* TransposeOp.swift */; };
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC66212306B000D94F7E /* ConcatOp.swift */; };
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC68212306D300D94F7E /* ConcatKernel.swift */; };
FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */; };
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; };
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; };
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; };
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; }; FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; }; FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; }; FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
...@@ -55,15 +116,55 @@ ...@@ -55,15 +116,55 @@
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; }; FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; }; FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; }; FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; };
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; };
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; };
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */; };
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */; };
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */; };
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */; };
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */; };
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */; };
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; };
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; }; FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; }; FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; }; FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; };
4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; };
4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; };
4AA1EA8B2146640900D0F791 /* SplitOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitOp.swift; sourceTree = "<group>"; };
4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitKernel.swift; sourceTree = "<group>"; };
4AA1EA8F214664CD00D0F791 /* Split.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.metal; sourceTree = "<group>"; };
4AA1EA91214665D700D0F791 /* ShapeOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeOp.swift; sourceTree = "<group>"; };
4AA1EA932146661500D0F791 /* ShapeKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeKernel.swift; sourceTree = "<group>"; };
4AA1EA972146666500D0F791 /* FlattenOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenOp.swift; sourceTree = "<group>"; };
4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ConcatKernel.inc.metal; sourceTree = "<group>"; };
4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ReshapeKernel.inc.metal; sourceTree = "<group>"; };
4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenKernel.swift; sourceTree = "<group>"; };
4AA1EAA3214A295C00D0F791 /* Split.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.inc.metal; sourceTree = "<group>"; };
4AA1EAA5214B5F6800D0F791 /* Shape.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Shape.metal; sourceTree = "<group>"; };
4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.inc.metal; sourceTree = "<group>"; };
4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.inc.metal; sourceTree = "<group>"; };
4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.inc.metal; sourceTree = "<group>"; };
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = TransposeKernel.inc.metal; sourceTree = "<group>"; };
4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = "<group>"; };
4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; };
4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = "<group>"; };
4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = "<group>"; };
CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; }; CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; };
DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; }; E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; };
FC0226552138F33800F395E2 /* TransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = TransposeKernel.metal; sourceTree = "<group>"; };
FC0226572138F38D00F395E2 /* PoolKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.metal; sourceTree = "<group>"; };
FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = "<group>"; }; FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = "<group>"; };
FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
...@@ -93,17 +194,54 @@ ...@@ -93,17 +194,54 @@
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; }; FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; }; FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; }; FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = "<group>"; }; FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; };
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; };
FC292C7C214255BC00CF622F /* CPUCompute.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; };
FC292C7D214255BC00CF622F /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
FC292C862142624800CF622F /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
FC33B0EF2147659000714A93 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; }; FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; }; FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; }; FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; }; FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; }; FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluOp.swift; sourceTree = "<group>"; };
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluKernel.swift; sourceTree = "<group>"; };
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPreluKernel.metal; sourceTree = "<group>"; };
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPrelu.inc.metal; sourceTree = "<group>"; };
FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; };
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; };
FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; }; FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; }; FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; }; FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; }; FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; }; FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReshapeKernel.metal; sourceTree = "<group>"; };
FCA3A1642132A5EB00084FE5 /* Common.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Common.metal; sourceTree = "<group>"; };
FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvTransposeKernel.metal; sourceTree = "<group>"; };
FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = "<group>"; };
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DepthwiseConvOp.swift; sourceTree = "<group>"; };
FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxOp.swift; sourceTree = "<group>"; };
FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxKernel.swift; sourceTree = "<group>"; };
FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeKernel.swift; sourceTree = "<group>"; };
FCBCCC642122FCD700D94F7E /* TransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeOp.swift; sourceTree = "<group>"; };
FCBCCC66212306B000D94F7E /* ConcatOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatOp.swift; sourceTree = "<group>"; };
FCBCCC68212306D300D94F7E /* ConcatKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatKernel.swift; sourceTree = "<group>"; };
FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderOp.swift; sourceTree = "<group>"; };
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; };
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; };
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; };
FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; }; FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; }; FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; }; FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
...@@ -113,9 +251,25 @@ ...@@ -113,9 +251,25 @@
FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; }; FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; }; FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; }; FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = "<group>"; };
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = "<group>"; };
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = "<group>"; };
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.inc.metal; sourceTree = "<group>"; };
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.metal; sourceTree = "<group>"; };
FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; };
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; }; FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; }; FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; }; FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
...@@ -123,7 +277,9 @@ ...@@ -123,7 +277,9 @@
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */,
D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */, D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
...@@ -133,6 +289,7 @@ ...@@ -133,6 +289,7 @@
336CBE234BF5DE48658DE65F /* Frameworks */ = { 336CBE234BF5DE48658DE65F /* Frameworks */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */,
DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */, DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */,
); );
name = Frameworks; name = Frameworks;
...@@ -168,10 +325,19 @@ ...@@ -168,10 +325,19 @@
FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = { FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCE9D7B6214F869000B520C3 /* Net.swift */,
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */,
FC33B0EF2147659000714A93 /* MobileNet.swift */,
FC292C862142624800CF622F /* Genet.swift */,
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */,
FC292C7C214255BC00CF622F /* CPUCompute.mm */,
FC292C7D214255BC00CF622F /* CPUCompute.h */,
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */,
FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */,
FC4FD9762140E4920073E130 /* CPU */,
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */,
FC039BAE20E11CC20081E9F8 /* Program */, FC039BAE20E11CC20081E9F8 /* Program */,
FC039BA320E11CBC0081E9F8 /* Operators */, FC039BA320E11CBC0081E9F8 /* Operators */,
FC039BA120E11CB70081E9F8 /* Loader.swift */,
FC039B9A20E11CA00081E9F8 /* Executor.swift */,
FC039B9C20E11CB20081E9F8 /* framework */, FC039B9C20E11CB20081E9F8 /* framework */,
FC039B9320E11C9A0081E9F8 /* Common */, FC039B9320E11C9A0081E9F8 /* Common */,
FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */, FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
...@@ -196,6 +362,8 @@ ...@@ -196,6 +362,8 @@
FC039B9C20E11CB20081E9F8 /* framework */ = { FC039B9C20E11CB20081E9F8 /* framework */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC039BA120E11CB70081E9F8 /* Loader.swift */,
FC039B9A20E11CA00081E9F8 /* Executor.swift */,
FC039B9D20E11CB20081E9F8 /* Tensor.swift */, FC039B9D20E11CB20081E9F8 /* Tensor.swift */,
FC039B9E20E11CB20081E9F8 /* Dim.swift */, FC039B9E20E11CB20081E9F8 /* Dim.swift */,
FC9D038320E23B01000F735A /* Texture.swift */, FC9D038320E23B01000F735A /* Texture.swift */,
...@@ -219,6 +387,23 @@ ...@@ -219,6 +387,23 @@
FCD04E6920F319EC0007374F /* SoftmaxOp.swift */, FCD04E6920F319EC0007374F /* SoftmaxOp.swift */,
FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */, FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */,
FCD04E7120F343420007374F /* ConvAddOp.swift */, FCD04E7120F343420007374F /* ConvAddOp.swift */,
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */,
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */,
FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */,
FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */,
FCBCCC642122FCD700D94F7E /* TransposeOp.swift */,
FCBCCC66212306B000D94F7E /* ConcatOp.swift */,
FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */,
4AA1EA8B2146640900D0F791 /* SplitOp.swift */,
4AA1EA91214665D700D0F791 /* ShapeOp.swift */,
4AA1EA972146666500D0F791 /* FlattenOp.swift */,
4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */,
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */,
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */,
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */,
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */,
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */,
); );
path = Operators; path = Operators;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -243,24 +428,46 @@ ...@@ -243,24 +428,46 @@
FC086BA520E67E8500D85EF7 /* Kernels */ = { FC086BA520E67E8500D85EF7 /* Kernels */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCDDC6CD212FE02100E5EF74 /* Base */,
FCEB6837212F00B100D2448E /* metal */,
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */, FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */, FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */, FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */, FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */, FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */, FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
FCD04E6720F315020007374F /* PoolKernel.swift */, FCD04E6720F315020007374F /* PoolKernel.swift */,
FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */, FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
FCD04E6F20F31B720007374F /* ReshapeKernel.swift */, FCD04E6F20F31B720007374F /* ReshapeKernel.swift */,
4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */,
FCD04E7320F3437E0007374F /* ConvAddKernel.swift */, FCD04E7320F3437E0007374F /* ConvAddKernel.swift */,
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */,
FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */,
FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */,
FCBCCC68212306D300D94F7E /* ConcatKernel.swift */,
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */,
4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */,
4AA1EA932146661500D0F791 /* ShapeKernel.swift */,
4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */,
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
); );
path = Kernels; path = Kernels;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC4FD9762140E4920073E130 /* CPU */ = {
isa = PBXGroup;
children = (
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */,
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */,
);
path = CPU;
sourceTree = "<group>";
};
FCD592FA20E248EC00252966 /* Base */ = { FCD592FA20E248EC00252966 /* Base */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
...@@ -271,6 +478,56 @@ ...@@ -271,6 +478,56 @@
path = Base; path = Base;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCDDC6CD212FE02100E5EF74 /* Base */ = {
isa = PBXGroup;
children = (
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
);
path = Base;
sourceTree = "<group>";
};
FCEB6837212F00B100D2448E /* metal */ = {
isa = PBXGroup;
children = (
4AF928812135673D005B6C3A /* ConcatKernel.metal */,
4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */,
4AF9288321357BE3005B6C3A /* Elementwise.metal */,
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
4AF928762133F1DB005B6C3A /* BoxCoder.metal */,
4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */,
4AA1EAA5214B5F6800D0F791 /* Shape.metal */,
4AA1EA8F214664CD00D0F791 /* Split.metal */,
4AA1EAA3214A295C00D0F791 /* Split.inc.metal */,
4AA1EA892146631C00D0F791 /* BilinearInterp.metal */,
4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */,
4AF9287821341661005B6C3A /* Softmax.metal */,
4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */,
FCEB6849212F00DB00D2448E /* PreluKernel.metal */,
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */,
4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */,
FCA3A1642132A5EB00084FE5 /* Common.metal */,
FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */,
FCA67CD42138272900BD58AA /* ConvAddMetal.metal */,
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */,
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */,
FC0226552138F33800F395E2 /* TransposeKernel.metal */,
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
FC0226572138F38D00F395E2 /* PoolKernel.metal */,
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
);
path = metal;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */ /* Begin PBXHeadersBuildPhase section */
...@@ -278,6 +535,10 @@ ...@@ -278,6 +535,10 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */, FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -315,6 +576,7 @@ ...@@ -315,6 +576,7 @@
TargetAttributes = { TargetAttributes = {
FC039B6920E11C3C0081E9F8 = { FC039B6920E11C3C0081E9F8 = {
CreatedOnToolsVersion = 9.3.1; CreatedOnToolsVersion = 9.3.1;
LastSwiftMigration = 0940;
}; };
}; };
}; };
...@@ -372,53 +634,124 @@ ...@@ -372,53 +634,124 @@
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */, FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */,
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */,
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */,
4AF9287921341661005B6C3A /* Softmax.metal in Sources */,
4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */,
FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */, FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */, FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */,
FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */,
4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */,
4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */,
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */,
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */, FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */,
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */, FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */,
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */,
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */,
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */, FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */, FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */,
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */,
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */, FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */, FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */, FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */,
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */, FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */, FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */, FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */, FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */,
FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */, FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */,
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */, FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */, FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */,
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */, FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FC292C872142624800CF622F /* Genet.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */,
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */, FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */,
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */, FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */,
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */, FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */,
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */,
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */,
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */, FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */, FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */,
FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */,
FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */,
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */, FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */,
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */, FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */, FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */, FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */,
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */, FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */, FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */, FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */, FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -550,6 +883,7 @@ ...@@ -550,6 +883,7 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */; baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */;
buildSettings = { buildSettings = {
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_IDENTITY = ""; CODE_SIGN_IDENTITY = "";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEFINES_MODULE = YES; DEFINES_MODULE = YES;
...@@ -557,6 +891,7 @@ ...@@ -557,6 +891,7 @@
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile/Info.plist"; INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
...@@ -565,10 +900,16 @@ ...@@ -565,10 +900,16 @@
"@executable_path/Frameworks", "@executable_path/Frameworks",
"@loader_path/Frameworks", "@loader_path/Frameworks",
); );
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)/paddle-mobile/CPU",
);
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget; MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
SKIP_INSTALL = YES; SKIP_INSTALL = YES;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
SWIFT_VERSION = 4.0; SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
...@@ -578,6 +919,7 @@ ...@@ -578,6 +919,7 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */; baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */;
buildSettings = { buildSettings = {
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_IDENTITY = ""; CODE_SIGN_IDENTITY = "";
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Automatic;
DEFINES_MODULE = YES; DEFINES_MODULE = YES;
...@@ -585,6 +927,7 @@ ...@@ -585,6 +927,7 @@
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile/Info.plist"; INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
...@@ -593,6 +936,11 @@ ...@@ -593,6 +936,11 @@
"@executable_path/Frameworks", "@executable_path/Frameworks",
"@loader_path/Frameworks", "@loader_path/Frameworks",
); );
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)/paddle-mobile/CPU",
);
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget; MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "0940"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>paddle-mobile.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>0</integer>
</dict>
</dict>
<key>SuppressBuildableAutocreation</key>
<dict>
<key>FC039B6920E11C3C0081E9F8</key>
<dict>
<key>primary</key>
<true/>
</dict>
</dict>
</dict>
</plist>
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#import <CoreImage/CoreImage.h>
#import <Foundation/Foundation.h>
@interface PaddleMobileCPUResult: NSObject
@property (assign, nonatomic, readonly) float *output;
@property (assign, nonatomic, readonly) int outputSize;
-(void)releaseOutput;
@end
@interface PaddleMobileCPU : NSObject
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
加载散开形式的模型, 需传入模型的目录
*/
- (BOOL)load:(NSString *)modelAndWeightPath;
/*
* 从内存中加载模型
* */
- (BOOL)LoadCombinedMemory:(size_t)modelLen
andModelBuf:(const uint8_t *)modelBuf
andModelParamsLen:(size_t)combinedParamsLen
andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
/*
* 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
* */
-(void)preprocess:(CGImageRef)image
output:(float *)output
means:(NSArray<NSNumber *> *)means
scale:(float)scale
dim:(NSArray<NSNumber *> *)dim;
/*
* 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
* */
- (PaddleMobileCPUResult *)predictInput:(float *)input
dim:(NSArray<NSNumber *> *)dim;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测, 默认 means 为 0, scale 为 1.0
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
@end
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#import <Foundation/Foundation.h>
@interface CPUResult: NSObject
@property (assign, nonatomic) float *output;
@property (assign, nonatomic) int outputSize;
@end
@interface NMSCompute: NSObject
@property (assign, nonatomic) float scoreThredshold;
@property (assign, nonatomic) int nmsTopK;
@property (assign, nonatomic) int keepTopK;
@property (assign, nonatomic) float nmsEta;
@property (assign, nonatomic) float nmsThreshold;
@property (assign, nonatomic) int background_label;
@property (strong, nonatomic) NSArray<NSNumber *> *scoreDim;
@property (strong, nonatomic) NSArray<NSNumber *> *bboxDim;
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox;
@end
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import "CPUCompute.h"
#import <map>
#import <vector>
#import <utility>
#import <algorithm>
struct NMSParam {
float *score_data;
float *box_data;
float *output;
int output_size;
std::vector<int> score_dim;
std::vector<int> box_dim;
float scoreThredshold;
int nmsTopK;
int keepTopK;
float nmsEta;
float nmsThreshold;
int background_label;
};
constexpr int kOutputDim = 6;
constexpr int kBBoxSize = 4;
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
template <class T>
static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
template <class T>
static inline T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename T>
static inline void NMSFast(
const T *bbox_data,
std::vector<int> bbox_dim,
const T *score_data,
const T score_threshold, const T nms_threshold,
const T eta, const int top_k,
std::vector<int>* selected_indices) {
// The total boxes for each instance.
int num_boxes = bbox_dim[0];
// 4: [xmin ymin xmax ymax]
int box_size = bbox_dim[1];
std::vector<T> scores_data(num_boxes);
std::copy_n(score_data, num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true);
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
selected_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
template <typename T>
void MultiClassNMS(const T *boxes_data,
const std::vector<int> &box_dim,
const T *scores_data,
const std::vector<int> &score_dim,
std::map<int, std::vector<int>>* indices, int* num_nmsed_out,
const int& background_label, const int& nms_top_k,
const int& keep_top_k, const T& nms_threshold,
const T& nms_eta, const T& score_threshold) {
int64_t class_num = score_dim[0];
int64_t predict_dim = score_dim[1];
int num_det = 0;
for (int c = 0; c < class_num; ++c) {
if (c == background_label) continue;
const T *score_data = scores_data + c * predict_dim;
/// [c] is key
NMSFast<T>(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta,
nms_top_k, &((*indices)[c]));
num_det += (*indices)[c].size();
}
*num_nmsed_out = num_det;
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<T, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
// PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
}
template <typename T>
void MultiClassOutput(const T *scores_data,
const std::vector<int> &score_dim,
const T *bboxes_data,
T *outputs_data,
const std::map<int, std::vector<int>>& selected_indices) {
int predict_dim = score_dim[1];
int count = 0;
for (const auto& it : selected_indices) {
/// one batch
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize;
outputs_data[count * kOutputDim] = label; // label
outputs_data[count * kOutputDim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax
std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T));
count++;
}
}
}
void MultiClassNMSCompute(NMSParam *param) {
assert(param->score_dim[0] == 1);
assert(param->box_dim[0] == 1);
assert (param->score_dim.size() == 3);
assert(param->box_dim.size() == 3);
float* outputs;
auto background_label = param->background_label;
auto nms_top_k = param->nmsTopK;
auto keep_top_k = param->keepTopK;
auto nms_threshold = param->nmsThreshold;
auto nms_eta = param->nmsEta;
auto score_threshold = param->scoreThredshold;
std::vector<int> score_dim_one_batch = {param->score_dim[1], param->score_dim[2]};
std::vector<int> box_dim_one_batch = {param->box_dim[1], param->box_dim[2]};
std::vector<int> batch_starts = {0};
std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0;
MultiClassNMS<float>(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out,
background_label, nms_top_k, keep_top_k, nms_threshold,
nms_eta, score_threshold);
batch_starts.push_back(batch_starts.back() + num_nmsed_out);
int output_size = 0;
int num_kept = batch_starts.back();
if (num_kept == 0) {
outputs = new float[1];
outputs[0] = -1;
output_size = 1;
} else {
outputs = new float[num_kept * kOutputDim];
int64_t s = batch_starts[0];
int64_t e = batch_starts[1];
if (e > s) {
MultiClassOutput<float>(param->score_data, score_dim_one_batch, param->box_data, outputs, indices);
}
output_size = num_kept * kOutputDim;
}
param->output = outputs;
param->output_size = output_size;
}
@implementation CPUResult
@end
@implementation NMSCompute
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
NMSParam param;
param.box_data = bbox;
param.score_data = score;
param.background_label = self.background_label;
param.scoreThredshold = self.scoreThredshold;
param.nmsTopK = self.nmsTopK;
param.keepTopK = self.keepTopK;
param.nmsEta = self.nmsEta;
param.nmsThreshold = self.nmsThreshold;
std::vector<int> score_dim;
for (int i = 0; i < self.scoreDim.count; ++i) {
score_dim.push_back(self.scoreDim[i].intValue);
}
param.score_dim = score_dim;
std::vector<int> box_dim;
for (int i = 0; i < self.bboxDim.count; ++i) {
box_dim.push_back(self.bboxDim[i].intValue);
}
param.box_dim = box_dim;
MultiClassNMSCompute(&param);
CPUResult *cr = [[CPUResult alloc] init];
cr.output = param.output;
cr.outputSize = param.output_size;
return cr;
}
@end
...@@ -16,95 +16,110 @@ import Foundation ...@@ -16,95 +16,110 @@ import Foundation
// 自定义 ?! 如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息 // 自定义 ?! 如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息
precedencegroup ExecutedOrFatalError{ precedencegroup ExecutedOrFatalError{
associativity: left associativity: left
higherThan: AssignmentPrecedence higherThan: AssignmentPrecedence
} }
infix operator ?!: ExecutedOrFatalError infix operator ?!: ExecutedOrFatalError
public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{ public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
if let inOpt = option { if let inOpt = option {
return inOpt return inOpt
}else{ }else{
print(excuteOrError()) print(excuteOrError())
fatalError(excuteOrError()) fatalError(excuteOrError())
} }
} }
//Lense //Lense
struct Lense<A, B> { struct Lense<A, B> {
let from: (A) -> B let from: (A) -> B
let to: (B, A) -> A let to: (B, A) -> A
} }
precedencegroup CombineLense{ precedencegroup CombineLense{
associativity: left associativity: left
higherThan: AssignmentPrecedence higherThan: AssignmentPrecedence
} }
infix operator >>>: CombineLense infix operator >>>: CombineLense
func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> { func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> {
return Lense<A, C>.init(from: { (a) -> C in return Lense<A, C>.init(from: { (a) -> C in
left.from(right.from(a)) left.from(right.from(a))
}, to: { (c, a) -> A in }, to: { (c, a) -> A in
right.to( left.to(c, right.from(a)),a) right.to( left.to(c, right.from(a)),a)
}) })
} }
protocol CIntIndex { protocol CIntIndex {
associatedtype T; associatedtype T;
subscript(index: CInt) -> T { get set}; subscript(index: CInt) -> T { get set};
} }
extension Array: CIntIndex{ extension Array: CIntIndex{
typealias T = Element typealias T = Element
subscript(index: CInt) -> T { subscript(index: CInt) -> T {
get{ get{
guard Int64(Int.max) >= Int64(index) else{ guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range") fatalError("cint index out of Int range")
} }
return self[Int(index)] return self[Int(index)]
} }
set{ set{
guard Int64(Int.max) >= Int64(index) else{ guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range") fatalError("cint index out of Int range")
} }
self[Int(index)] = newValue self[Int(index)] = newValue
}
} }
}
} }
extension Array where Element: AnyObject{ extension Array where Element: AnyObject{
mutating func remove(element: Element) { mutating func remove(element: Element) {
if let index = index(where: { (node) -> Bool in if let index = index(where: { (node) -> Bool in
return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self) return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
}) { }) {
remove(at: index) remove(at: index)
}
} }
}
} }
//MARK: Array extension //MARK: Array extension
extension Array where Element: Comparable{ extension Array where Element: Comparable{
/// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回 /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
/// ///
/// - Parameter r: 前 r 个元素 /// - Parameter r: 前 r 个元素
/// - Returns: [(原有位置, 排好位置的元素)] /// - Returns: [(原有位置, 排好位置的元素)]
public func top(r: Int) -> [(Int, Element)] { public func top(r: Int) -> [(Int, Element)] {
precondition(r <= self.count) precondition(r <= self.count)
return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1)) return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
}
}
extension Array {
public func strideArray(inCount: Int = 20) -> [(Int, Element)] {
if count < inCount {
return (0..<count).map{ ($0, self[$0]) }
} else {
let stride = count / inCount
var newArray: [(Int, Element)] = []
for i in 0..<inCount {
newArray.append((i * stride, self[i * stride]))
}
return newArray
} }
}
} }
extension String{ extension String{
func cStr() -> UnsafePointer<Int8>? { func cStr() -> UnsafePointer<Int8>? {
return (self as NSString).utf8String return (self as NSString).utf8String
} }
} }
func address<T: AnyObject>(o: T) -> String { func address<T: AnyObject>(o: T) -> String {
return String.init(format: "%018p", unsafeBitCast(o, to: Int.self)) return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
} }
......
...@@ -18,263 +18,588 @@ fileprivate var defaultMetalLibrary: MTLLibrary? ...@@ -18,263 +18,588 @@ fileprivate var defaultMetalLibrary: MTLLibrary?
fileprivate var paddleMobileMetalLibrary: MTLLibrary? fileprivate var paddleMobileMetalLibrary: MTLLibrary?
extension MTLDevice { extension MTLDevice {
func defaultLibrary() -> MTLLibrary { func defaultLibrary() -> MTLLibrary {
if defaultMetalLibrary == nil { if defaultMetalLibrary == nil {
defaultMetalLibrary = makeDefaultLibrary() defaultMetalLibrary = makeDefaultLibrary()
} }
if let inDefaultLib = defaultMetalLibrary { if let inDefaultLib = defaultMetalLibrary {
return inDefaultLib return inDefaultLib
} else { } else {
fatalError(" default metal libary is nil") fatalError(" default metal libary is nil")
} }
}
func paddleMobileLibrary() -> MTLLibrary {
if paddleMobileMetalLibrary == nil {
guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
fatalError("Counld't find paddle mobile library")
}
do {
paddleMobileMetalLibrary = try makeLibrary(filepath: path)
} catch _ {
fatalError("Counld't load paddle mobile library")
}
} }
func paddleMobileLibrary() -> MTLLibrary { if let inPaddleMobileLib = paddleMobileMetalLibrary {
if paddleMobileMetalLibrary == nil { return inPaddleMobileLib
guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else { } else {
fatalError("Counld't find paddle mobile library") fatalError("PaddleMobile metal libary is nil")
} }
do { }
paddleMobileMetalLibrary = try makeLibrary(filepath: path)
} catch _ { func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
fatalError("Counld't load paddle mobile library") let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
} guard let function = useLib.makeFunction(name: funcName) else {
} fatalError(" function " + funcName + " not found")
}
if let inPaddleMobileLib = paddleMobileMetalLibrary { do {
return inPaddleMobileLib let pipLine = try makeComputePipelineState(function: function)
} else { return pipLine
fatalError("PaddleMobile metal libary is nil") } catch let error {
} print(error)
fatalError("make pip line error occured : \(error)")
} }
func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState { }
let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
guard let function = useLib.makeFunction(name: funcName) else { func makeBuffer<P>(value: [P]) -> MTLBuffer {
fatalError(" function " + funcName + " not found") let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
} let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
do { for i in 0..<value.count {
let pipLine = try makeComputePipelineState(function: function) contents?[i] = value[i]
return pipLine }
} catch _ { return buffer!
fatalError("make pip line error occured") }
}
func texture2tensor_loop<P>(texture: MTLTexture, cb: ([Int], P)->Void) -> Void {
let bpR = texture.width * 4 * MemoryLayout<P>.size
let bpI = texture.height * bpR
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1))
for i in 0..<texture.arrayLength {
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: bpI)
texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
for tx in 0..<texture.width * texture.height * 4 {
var k = tx
var xyzn: [Int] = [0, 0, 0, 0]
xyzn[1] = k / (texture.width * 4)
k %= (texture.width * 4)
xyzn[3] = k % 4
xyzn[0] = k / 4
xyzn[2] = i
cb(xyzn, pointer[tx])
}
} }
}
func texture2tensor_3<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let count = dim.reduce(1) { $0 * $1 }
var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
let ndim: [Int] = transpose.map { tdim[$0] }
assert(dim.count == 3)
assert(texture.width == ndim[3])
assert(texture.height == ndim[2])
assert(ndim[0] == 1)
assert(texture.arrayLength == (ndim[1] + 3) / 4)
texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
var tg: [Int] = [0, 0, 0, 0]
tg[1] = xyzn[2] * 4 + xyzn[3]
tg[2] = xyzn[1]
tg[3] = xyzn[0]
var ig: [Int] = [0, 0, 0, 0]
for k in 0..<4 {
ig[transpose[k]] = tg[k]
}
let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
if ix < count {
tensor[ix] = v
}
}
return tensor
}
func texture2tensor_2<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let count = dim.reduce(1) { $0 * $1 }
var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
let ndim: [Int] = transpose.map { tdim[$0] }
assert(dim.count == 2)
let w = (ndim[3] + 3) / 4
assert(texture.width == w)
assert(texture.height == ndim[2])
assert(ndim[0] == 1)
assert(ndim[1] == 1)
assert(texture.arrayLength == 1)
func makeBuffer<P>(value: [P]) -> MTLBuffer { texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared) var tg: [Int] = [0, 0, 0, 0]
let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size) tg[2] = xyzn[1]
for i in 0..<value.count { tg[3] = xyzn[0] * 4 + xyzn[3]
contents?[i] = value[i] var ig: [Int] = [0, 0, 0, 0]
} for k in 0..<4 {
return buffer! ig[transpose[k]] = tg[k]
}
let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
if ix < count {
tensor[ix] = v
}
}
return tensor
}
func texture2tensor_1<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
} }
let count = dim.reduce(1) { $0 * $1 }
var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
let ndim: [Int] = transpose.map { tdim[$0] }
assert(dim.count == 1)
let w = (ndim[3] + 3) / 4
assert(texture.width == w)
assert(texture.height == 1)
assert(ndim[0] == 1)
assert(ndim[1] == 1)
assert(ndim[2] == 1)
assert(texture.arrayLength == 1)
func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{ texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
var tg: [Int] = [0, 0, 0, 0]
let textureDesc = MTLTextureDescriptor.init() tg[3] = xyzn[0] * 4 + xyzn[3]
textureDesc.width = textureWidth var ig: [Int] = [0, 0, 0, 0]
textureDesc.height = textureHeight for k in 0..<4 {
textureDesc.depth = 1 ig[transpose[k]] = tg[k]
textureDesc.usage = [.shaderRead, .shaderWrite] }
textureDesc.pixelFormat = .rgba32Float let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
textureDesc.textureType = .type2DArray if ix < count {
textureDesc.storageMode = .shared tensor[ix] = v
textureDesc.cpuCacheMode = .defaultCache }
textureDesc.arrayLength = arrayLength }
let texture = makeTexture(descriptor: textureDesc)! return tensor
}
if arrayLength == 1 && value.count >= 4{
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size) func texture2tensor<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
for i in 0..<value.count { if dim.count == 3 {
pointer[i] = value[i] return texture2tensor_3(texture: texture, dim: dim, transpose: transpose)
} else if dim.count == 2 {
return texture2tensor_2(texture: texture, dim: dim, transpose: transpose)
} else if dim.count == 1 {
return texture2tensor_1(texture: texture, dim: dim, transpose: transpose)
}
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let count = dim.reduce(1) { $0 * $1 }
var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
let ndim: [Int] = transpose.map { tdim[$0] }
assert(texture.width == ndim[2])
assert(texture.height == ndim[1])
assert(texture.arrayLength == (ndim[0] * ndim[3] + 3) / 4)
texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
var tg: [Int] = [0, 0, 0, 0]
tg[1] = xyzn[1]
tg[2] = xyzn[0]
tg[0] = (xyzn[2] * 4 + xyzn[3]) / ndim[3]
tg[3] = (xyzn[2] * 4 + xyzn[3]) % ndim[3]
var ig: [Int] = [0, 0, 0, 0]
for k in 0..<4 {
ig[transpose[k]] = tg[k]
}
let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
if ix < count {
tensor[ix] = v
}
}
return tensor
}
func tensor2texture<P>(value: [P], dim: [Int], transpose: [Int] = [0, 1, 2, 3], inComputePrecision: ComputePrecision = .Float32) -> MTLTexture {
if value.count > 0 {
assert(value.count == dim.reduce(1) { $0 * $1 })
}
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let ndim: [Int] = transpose.map { tdim[$0] }
let textureDesc = MTLTextureDescriptor.init()
textureDesc.width = ndim[2]
textureDesc.height = ndim[1]
textureDesc.depth = 1
textureDesc.usage = [.shaderRead, .shaderWrite]
if inComputePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if inComputePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
}
textureDesc.textureType = .type2DArray
textureDesc.storageMode = .shared
textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = (ndim[0] * ndim[3] + 3) / 4
let texture = makeTexture(descriptor: textureDesc)!
if value.count > 0 {
var rcount: Int = (ndim[0] * ndim[3] + 3) / 4
rcount = rcount * 4 * ndim[1] * ndim[2]
var nvalue: [Float32] = .init(repeating: 0.0, count: rcount)
for i0 in 0..<tdim[0] {
for i1 in 0..<tdim[1] {
for i2 in 0..<tdim[2] {
for i3 in 0..<tdim[3] {
let ig = [i0, i1, i2, i3]
let ix = (i0 * tdim[1] * tdim[2] * tdim[3]) + (i1 * tdim[2] * tdim[3]) + (i2 * tdim[3]) + i3
let jg = transpose.map { ig[$0] }
let k = jg[0] * ndim[3] + jg[3]
let jx = ((k / 4) * ndim[1] * ndim[2] * 4) + (jg[1] * ndim[2] * 4) + (jg[2] * 4) + (k % 4)
nvalue[jx] = value[ix] as! Float32
} }
}
let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow)
} else {
} }
}
return texture
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1))
if inComputePrecision == .Float16 {
let xvalue: [UInt16] = .init(repeating: 0, count: rcount)
let pointer: UnsafeMutablePointer<Float32> = UnsafeMutablePointer(mutating: nvalue)
let outputP: UnsafeMutablePointer<UInt16> = UnsafeMutablePointer(mutating: xvalue)
float32ToFloat16(input: pointer, output: outputP, count: rcount)
let bpR = ndim[2] * 4 * 2
let bpI = ndim[1] * bpR
for i in 0..<textureDesc.arrayLength {
let p = outputP + texture.width * texture.height * 4 * i
texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bpR, bytesPerImage: bpI)
}
} else {
let pointer: UnsafeMutablePointer<Float32> = UnsafeMutablePointer(mutating: nvalue)
let bpR = ndim[2] * 4 * MemoryLayout<P>.size
let bpI = ndim[1] * bpR
for i in 0..<textureDesc.arrayLength {
let p = pointer + texture.width * texture.height * 4 * i
texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bpR, bytesPerImage: bpI)
}
}
} }
return texture
}
func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
let textureDesc = MTLTextureDescriptor.init()
textureDesc.width = textureWidth
textureDesc.height = textureHeight
textureDesc.depth = 1
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.pixelFormat = .rgba32Float
textureDesc.textureType = .type2DArray
textureDesc.storageMode = .shared
textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = arrayLength
let texture = makeTexture(descriptor: textureDesc)!
if value.count >= 4{
let counts = arrayLength * 4 * textureWidth * textureHeight
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: counts * MemoryLayout<P>.size)
for i in 0..<value.count {
pointer[i] = value[i]
}
for i in value.count..<counts {
pointer[i] = 0 as! P
}
let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
let bytesPerImage = texture.height * bytesPerRow
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
for i in 0..<arrayLength {
let p = pointer + texture.width * texture.height * 4 * i
texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage)
}
} else {
}
return texture
}
} }
extension MTLComputeCommandEncoder { extension MTLComputeCommandEncoder {
func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) { public func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
let slices = (outTexture.arrayLength * 4 + 3)/4 let slices = (outTexture.arrayLength * 4 + 3)/4
let width = computePipline.threadExecutionWidth let width = computePipline.threadExecutionWidth
let height = computePipline.maxTotalThreadsPerThreadgroup/width let height = computePipline.maxTotalThreadsPerThreadgroup/width
let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1) let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
// print(" thread: threads per group: \(threadsPerGroup) ") // print(" thread: threads per group: \(threadsPerGroup) ")
// print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)") // print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
let groupWidth = (outTexture.width + width - 1)/width let groupWidth = (outTexture.width + width - 1)/width
let groupHeight = (outTexture.height + height - 1)/height let groupHeight = (outTexture.height + height - 1)/height
let groupDepth = slices let groupDepth = slices
let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth) let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
// print("groups: \(groups) ") setComputePipelineState(computePipline)
// print("threads per group: \(threadsPerGroup)")
dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
setComputePipelineState(computePipline) }
dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
}
} }
public extension MTLTexture { public extension MTLTexture {
func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] { func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
var arr: [P] = floatArray { (p: P) -> P in var arr: [P] = floatArray { (p: P) -> P in
return p; return p;
}
var result: [(index: Int, value: P)] = []
if arr.count > 100 && stridable {
for j in stride(from: 0, to: arr.count , by: arr.count / 100){
result.append((j, arr[j]))
}
} else {
for j in 0..<arr.count {
result.append((j, arr[j]))
}
}
return result
}
func floatArray<P, T>(res: (P) -> T) -> [T] {
var fArr: [T] = []
if textureType == .type2DArray {
for i in 0..<arrayLength{
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * depth * 4 {
fArr.append(res(p[j]))
} }
var result: [(index: Int, value: P)] = [] bytes.deallocate()
if arr.count > 100 && stridable { }
for j in stride(from: 0, to: arr.count , by: arr.count / 100){ } else if textureType == .type2D {
result.append((j, arr[j])) let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
} let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * 4 {
fArr.append(res(p[j]))
}
bytes.deallocate()
}
return fArr
}
func float32Array() -> [Float32] {
if pixelFormat == .rgba32Float {
let float32Array = floatArray { (f: Float32) -> Float32 in
return f
}
return float32Array
} else if pixelFormat == .rgba16Float {
var float16Array = floatArray { (f: Float16) -> Float16 in
return f
}
return float16To32(input: &float16Array, count: float16Array.count)
} else {
fatalError()
}
}
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header)
print("texture: \(self)")
// let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
// print(res)
if textureType == .type2DArray {
for i in 0..<arrayLength{
var str: String = "slice: \(i): \n"
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
let p = bytes.assumingMemoryBound(to: T.self)
str += "2d array count : \(width * height * depth * 4) \n"
if stridable && width * height * depth * 4 > 20 {
for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 20){
str += " index \(j): \(p[j])"
}
} else { } else {
for j in 0..<arr.count { for j in 0..<width * height * depth * 4 {
result.append((j, arr[j])) str += " index \(j): \(p[j])"
}
}
bytes.deallocate()
print(str)
}
} else if textureType == .type2D {
var str: String = "texture 2D: "
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
let p = bytes.assumingMemoryBound(to: T.self)
str += "2d count : \(width * width * 4) \n"
if stridable {
for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 20){
str += "index \(j): \(p[j]) "
}
} else {
for j in 0..<width * height * 4 {
str += "index \(j): \(p[j]) "
}
}
print(str)
bytes.deallocate()
}
return nil
}
// n c h w - dim
func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
var textureArray: [Float32]
if pixelFormat == .rgba32Float {
textureArray = floatArray { (i : Float32) -> Float32 in
return i
}
} else if pixelFormat == .rgba16Float {
var textureFloat16Array = floatArray { (i : Float16) -> Float16 in
return i
}
textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count)
} else {
fatalError(" 目前还不支持其他类型 ")
}
var output: [Float32] = []
for s in 0..<arrayLength {
for c in 0..<4{
for h in 0..<dim.h {
for w in 0..<dim.w {
if (s * 4 + c) < dim.c {
let textureValue = textureArray[dim.w * dim.h * 4 * s + h * dim.w * 4 + w * 4 + c]
output.append(textureValue)
} }
}
} }
return result }
}
return output
}
func realNHWC(dim: (n: Int, h: Int, w: Int, c: Int)) -> [Float32] {
// print("origin dim: \(dim)")
// print("texture: ")
// print(self)
var textureArray: [Float32]
if pixelFormat == .rgba32Float {
textureArray = floatArray { (i : Float32) -> Float32 in
return i
}
} else if pixelFormat == .rgba16Float {
var textureFloat16Array = floatArray { (i : Float16) -> Float16 in
return i
}
textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count)
} else {
fatalError(" 目前还不支持其他类型 ")
} }
func floatArray<P, T>(res: (P) -> T) -> [T] { var output: [Float32] = []
var fArr: [T] = [] let numOfASlice = dim.h * dim.w * 4
if textureType == .type2DArray { for h in 0..<dim.h {
for i in 0..<arrayLength{ for w in 0..<dim.w {
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment) for sliceIndex in 0..<arrayLength {
let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size if sliceIndex * 4 + 4 > dim.c {
let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size for i in 0..<(4 - ((sliceIndex * 4 + 4) - dim.c)) {
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) output.append(value)
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * depth * 4 {
fArr.append(res(p[j]))
}
bytes.deallocate()
} }
} else if textureType == .type2D { } else {
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment) for i in 0..<4 {
let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) output.append(value)
getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * 4 {
fArr.append(res(p[j]))
} }
bytes.deallocate() }
} }
return fArr }
} }
return output
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? { }
print(header)
print("texture: \(self)")
let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
print(res)
// if textureType == .type2DArray {
// for i in 0..<arrayLength{
// var str: String = "slice: \(i): \n"
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
// let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d array count : \(width * height * depth * 4) \n"
// if stridable && width * height * depth * 4 > 100 {
// for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
// str += " index \(j): \(p[j])"
// }
// } else {
// for j in 0..<width * height * depth * 4 {
// str += " index \(j): \(p[j])"
// }
// }
//
// bytes.deallocate()
// print(str)
// }
// } else if textureType == .type2D {
// var str: String = "texture 2D: "
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
// let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d count : \(width * width * 4) \n"
//
// if stridable {
// for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
// str += "index \(j): \(p[j]) "
// }
// } else {
// for j in 0..<width * height * 4 {
// str += "index \(j): \(p[j]) "
// }
// }
//
// print(str)
// bytes.deallocate()
// }
return nil
}
} }
public extension MTLBuffer { public extension MTLBuffer {
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? { func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header) print(header)
print("MTLBuffer: \(self) ") print("MTLBuffer: \(self) ")
var str = "" var str = ""
if stridable && length/MemoryLayout<T>.stride > 1000{ if stridable && length/MemoryLayout<T>.stride > 1000{
for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){ for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
str += " \(contents().assumingMemoryBound(to: T.self)[j])" str += " \(contents().assumingMemoryBound(to: T.self)[j])"
} }
} else { } else {
for i in 0..<length/MemoryLayout<T>.size { for i in 0..<length/MemoryLayout<T>.size {
str += " \(contents().assumingMemoryBound(to: T.self)[i])" str += " \(contents().assumingMemoryBound(to: T.self)[i])"
} }
}
print(str)
return nil
} }
print(str)
func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture { return nil
let textureDesc = MTLTextureDescriptor.init() }
textureDesc.width = textureWidth
textureDesc.height = textureHeight func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
textureDesc.depth = 1 let textureDesc = MTLTextureDescriptor.init()
textureDesc.usage = [.shaderRead, .shaderWrite] textureDesc.width = textureWidth
textureDesc.pixelFormat = .rgba32Float textureDesc.height = textureHeight
textureDesc.textureType = .type2DArray textureDesc.depth = 1
textureDesc.storageMode = .shared textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.cpuCacheMode = .defaultCache textureDesc.pixelFormat = .rgba32Float
textureDesc.arrayLength = arrayLength textureDesc.textureType = .type2DArray
let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)! textureDesc.storageMode = .shared
return texture textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = arrayLength
let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
return texture
}
func array<T>() -> [T] {
var array: [T] = []
let pointer = contents().bindMemory(to: T.self, capacity: length)
for i in 0..<(length / MemoryLayout<T>.size) {
array.append(pointer[i])
} }
return array;
}
} }
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// TestConvAddBatchNormRelu.swift
// paddle-mobile-demo Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/7/25. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal import Metal
import Foundation import Foundation
...@@ -17,6 +23,204 @@ public class PaddleMobileUnitTest { ...@@ -17,6 +23,204 @@ public class PaddleMobileUnitTest {
queue = inQueue queue = inQueue
} }
private func indentPrintTensor(tensor: [Float32], dim: [Int], ix: [Int], indentLevel: Int) {
let indent = Array.init(repeating: " ", count: indentLevel).joined(separator: "")
var tx = ix
if dim.count == indentLevel + 1 {
var log: String = indent + "["
for i in 0..<dim[indentLevel] {
tx = ix
tx[indentLevel] = i
for x in 1..<dim.count {
for y in 0..<x {
tx[y] *= dim[x]
}
}
let c = tx.reduce(0) { $0 + $1 }
if i > 0 {
log += ", "
}
log += tensor[c].description
}
log += "]"
if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
log += ","
}
print(log)
} else {
print(indent + "[")
for i in 0..<dim[indentLevel] {
tx[indentLevel] = i
indentPrintTensor(tensor: tensor, dim: dim, ix: tx, indentLevel: indentLevel + 1)
}
if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
print(indent + "],")
} else {
print(indent + "]")
}
}
}
private func tensorPrint(tensor: [Float32], dim: [Int]) {
var detectPos = -1
var odim = 1
var ndim = dim
for i in 0..<dim.count {
if dim[i] == -1 {
if detectPos == -1 {
detectPos = i
} else {
detectPos = -2
}
} else if dim[i] <= 0 {
detectPos = -3
} else {
odim *= dim[i]
}
}
assert(detectPos >= -1)
if (detectPos == -1) {
assert(tensor.count == odim)
} else {
assert(tensor.count % odim == 0)
ndim[detectPos] = tensor.count / odim
}
indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0)
}
public func testConcat() {
// let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// var it: [[Float32]] = []
// for _ in 0..<7 {
// it.append((0..<12).map { Float32($0) })
// }
// let input = it.map { device.tensor2texture(value: $0, dim: [3, 4]) }
// let output = device.tensor2texture(value: [Float32](), dim: [3, 28])
//
// let param = ConcatTestParam.init(
// input: input,
// output: output,
// dims: [[3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4]],
// axis: 1,
// odim: [3, 28]
// )
// let concatKernel = ConcatKernel<Float32>.init(device: device, testParam: param)
// concatKernel.test(cmdBuffer: buffer, param: param)
// buffer.addCompletedHandler { (buffer) in
// for i in 0..<it.count {
// let _: Float32? = input[i].logDesc()
// self.tensorPrint(tensor: it[i], dim: [3, 4])
// }
// let _: Float32? = output.logDesc()
// let tx: [Float32] = self.device.texture2tensor(texture: output, dim: [3, 28])
// self.tensorPrint(tensor: tx, dim: [3, 28])
// }
//
// buffer.commit()
}
public func testReshape() {
// let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// let input: [Float32] = (0..<24).map { Float32($0) }
// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6])
// let mp = ReshapeMetalParam.init(
// idim: (1, 2, 3, 4),
// itrans: (0, 1, 2, 3),
// odim: (1, 1, 4, 6),
// otrans: (0, 1, 2, 3)
// )
// let param = ReshapeTestParam.init(
// inputTexture: inTexture,
// outputTexture: outTexture,
// param: mp
// )
// let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
// reshapeKernel.test(commandBuffer: buffer, testParam: param)
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inTexture.logDesc()
// let _: Float32? = outTexture.logDesc()
// self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6])
// self.tensorPrint(tensor: tx, dim: [4, 6])
// }
// let input: [Float32] = (0..<24).map { Float32($0) }
// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outTexture = device.tensor2texture(value: [Float32](), dim: [24])
// let mp = ReshapeMetalParam.init(
// idim: (1, 2, 3, 4),
// itrans: (0, 1, 2, 3),
// odim: (1, 1, 1, 24),
// otrans: (0, 1, 2, 3)
// )
// let param = ReshapeTestParam.init(
// inputTexture: inTexture,
// outputTexture: outTexture,
// param: mp
// )
// let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
// reshapeKernel.test(commandBuffer: buffer, testParam: param)
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inTexture.logDesc()
// let _: Float32? = outTexture.logDesc()
// self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24])
// self.tensorPrint(tensor: tx, dim: [24])
// }
//
//
// buffer.commit()
}
public func testTranspose() {
let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// var input: [Float32] = []
// for i in 0..<72 {
// input.append(Float32(i))
// }
//// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
// let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
// // group 1
// let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
//// // group 2
//// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
////
// let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
//
// transposeKernel.test(commandBuffer: buffer, param: param)
//
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
// self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
// }
//
// let input: [Float32] = (0..<24).map { Float32($0) }
// let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1])
// let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
//
// transposeKernel.test(commandBuffer: buffer, param: param)
//
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2])
// self.tensorPrint(tensor: tx, dim: [3, 4, 2])
// }
//
buffer.commit()
}
public func testConvAddBnRelu() { public func testConvAddBnRelu() {
let buffer = queue.makeCommandBuffer() ?! " buffer is nil " let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
...@@ -116,7 +320,7 @@ public class PaddleMobileUnitTest { ...@@ -116,7 +320,7 @@ public class PaddleMobileUnitTest {
let offsetX = filterSize.width/2 - paddings.0 let offsetX = filterSize.width/2 - paddings.0
let offsetY = filterSize.height/2 - paddings.1 let offsetY = filterSize.height/2 - paddings.1
let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0)) let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), dilationX: UInt16(1), dilationY: UInt16(1))
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize) let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
...@@ -132,16 +336,6 @@ public class PaddleMobileUnitTest { ...@@ -132,16 +336,6 @@ public class PaddleMobileUnitTest {
} }
buffer.commit() buffer.commit()
// let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
// let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
// ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
} }
} }
......
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Tools.swift
// paddle-mobile Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/7/26. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation import Foundation
......
...@@ -13,80 +13,228 @@ ...@@ -13,80 +13,228 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import Accelerate
public protocol SummableMultipliable: Equatable { public protocol SummableMultipliable: Equatable {
static func +(lhs: Self, rhs: Self) -> Self static func +(lhs: Self, rhs: Self) -> Self
static func *(lhs: Self, rhs: Self) -> Self static func *(lhs: Self, rhs: Self) -> Self
static func -(lhs: Self, rhs: Self) -> Self static func -(lhs: Self, rhs: Self) -> Self
} }
public protocol PrecisionType: SummableMultipliable{ public protocol PrecisionType: SummableMultipliable{
init(inFloat: Float32) init(inFloat: Float32)
init(inFloat16: Float16) init(inFloat16: Float16)
init<P: PrecisionType>(_ inP: P) init<P: PrecisionType>(_ inP: P)
static var bitSize: UInt { get } static var bitSize: UInt { get }
} }
public typealias Float16 = Int16 public typealias Float16 = Int16
extension Float16: PrecisionType { extension Float16: PrecisionType {
public static func * (prefix: Float16, postfix: Float16) { public static func * (prefix: Float16, postfix: Float16) {
return prefix * postfix return prefix * postfix
}
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = Float16(inFloat: inP as! Float32)
} else if P.bitSize == Float16.bitSize {
self = inP as! Float16
} else {
fatalError()
} }
}
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize { public static var bitSize: UInt {
self = Float16(inFloat: inP as! Float32) return 16
} else if P.bitSize == Float16.bitSize { }
self = inP as! Float16
} else { public init(inFloat16: Float16) {
fatalError() self = inFloat16
}
public init(inFloat: Float32) {
self = Int16(inFloat)
}
}
extension Float32: PrecisionType {
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = inP as! Float32
} else if P.bitSize == Float16.bitSize {
self = Float32.init(inP as! Float16)
} else {
fatalError()
}
}
public init(inFloat: Float32) {
self = inFloat
}
public init(inFloat16: Float16) {
self = Float32.init(inFloat16)
}
public static var bitSize: UInt {
return 32
}
}
public func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) {
var float32Buffer = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 4)
var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2)
guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else {
fatalError(" float 32 to float 16 error ! ")
}
}
public func float16To32(input: UnsafeMutablePointer<Float16>, count: Int) -> [Float32] {
var output = Array<Float>.init(repeating: 0.0, count: count)
float16to32(input: input, output: &output, count: count)
return output
}
public func float16to32(input: UnsafeMutablePointer<Float16>, output: UnsafeMutablePointer<Float32>, count: Int) {
var bufferFloat16 = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 2)
var bufferFloat32 = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 4)
if vImageConvert_Planar16FtoPlanarF(&bufferFloat16, &bufferFloat32, 0) != kvImageNoError {
fatalError(" convert float16 to float32 error")
}
}
// N - 0 C - 1 H - 2 W - 3
struct DataLayout {
static func NCHW(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.C, dim[1]), (.H, dim[2]), (.W, dim[3])])
}
static func NHWC(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.H, dim[1]), (.W, dim[2]), (.C, dim[3])])
}
func count() -> Int {
return layoutWithDim.count
}
var N: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .N {
return layoutDim.1
} }
}
return nil
} }
set {
public static var bitSize: UInt { var newN = (Layout.N, newValue)
return 16 if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
fatalError()
}
} }
}
public init(inFloat16: Float16) { var C: Int? {
self = inFloat16 get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .C {
return layoutDim.1
}
}
return nil
} }
public init(inFloat: Float32) { set {
self = Int16(inFloat) var newN = (Layout.C, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
fatalError()
}
} }
}
var H: Int? {
get {
} for layoutDim in layoutWithDim {
if layoutDim.0 == .H {
extension Float32: PrecisionType { return layoutDim.1
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = inP as! Float32
} else if P.bitSize == Float16.bitSize {
self = Float32.init(inP as! Float16)
} else {
fatalError()
} }
}
return nil
} }
set {
public init(inFloat: Float32) { var newN = (Layout.H, newValue)
self = inFloat if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .H
}) {
fatalError()
}
} }
}
public init(inFloat16: Float16) { var W: Int? {
self = Float32.init(inFloat16) get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .W {
return layoutDim.1
}
}
return nil
}
set {
var newN = (Layout.W, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .W
}) {
fatalError()
}
} }
}
init(_ inLayout: [(Layout, Int)]) {
layoutWithDim = inLayout
}
func layout() -> [Layout] {
return layoutWithDim.map({ (layout: Layout, dim: Int) -> Layout in
return layout
})
}
var layoutWithDim: [(Layout, Int)] = [(.N, 0), (.C, 0), (.H, 0), (.W, 0)]
func convertTo(inLayout: [Layout]) {
public static var bitSize: UInt { }
return 32
enum Layout: Int{
case N = 0
case C = 1
case H = 2
case W = 3
static func defaultLayout() -> [Layout] {
return [N, C, H, W]
} }
}
} }
public enum DataLayout { extension DataLayout: Equatable {
case NCHW public static func == (lhs: DataLayout, rhs: DataLayout) -> Bool {
case NHWC if lhs.layoutWithDim.count == rhs.layoutWithDim.count {
var result = true
for i in 0..<lhs.layoutWithDim.count {
result = (lhs.layoutWithDim[i].0 == rhs.layoutWithDim[i].0)
if !result {
break
}
}
return result
} else {
return false
}
}
} }
protocol Variant: CustomStringConvertible, CustomDebugStringConvertible { public protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
} }
extension Tensor: Variant { extension Tensor: Variant {
...@@ -95,12 +243,52 @@ extension Tensor: Variant { ...@@ -95,12 +243,52 @@ extension Tensor: Variant {
extension Texture: Variant { extension Texture: Variant {
} }
extension ResultHolder: Variant { extension GPUResultHolder: Variant {
} }
extension InputTexture: Variant { extension InputTexture: Variant {
} }
extension MTLTexture where Self: Variant { extension MTLTexture where Self: Variant {
} }
class FetchHolder: Variant {
var resultBuffer: MTLBuffer?
var dim: [Int]
var capacity: Int
init(inCapacity: Int, inDim: [Int]) {
capacity = inCapacity
dim = inDim
}
func initBuffer(device: MTLDevice) {
resultBuffer = device.makeBuffer(length: capacity * 4, options: [])
}
var result: UnsafeMutablePointer<Float32> {
guard let inResultBuffer = resultBuffer else {
fatalError()
}
return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity)
}
}
extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
fatalError()
// return "\(result)"
}
var debugDescription: String {
fatalError()
// return "\(result)"
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
public let resultArr: [P]
public let elapsedTime: Double
public init(inDim: [Int], inResult: [P], inElapsedTime: Double) {
dim = inDim
resultArr = inResult
elapsedTime = inElapsedTime
}
}
extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
var str = ""
str += "Dim: \(dim) \n value:[ "
if resultArr.count < 20 {
for d in resultArr {
str += " \(d) "
}
} else {
for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
str += " \(resultArr[d]) "
}
}
str += " ]"
return str
}
public var description: String {
return debugDescription
}
}
public class Executor<P: PrecisionType> {
var ops: [Runable & InferShaperable] = []
let program: Program
let device: MTLDevice
let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
program = inProgram
device = inDevice
queue = inQueue
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<block.ops.count {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
op.inferShape()
ops.append(op)
} catch let error {
throw error
}
}
// for op in block.ops {
// do {
// let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
// op.inferShape()
// ops.append(op)
// } catch let error {
// throw error
// }
// }
}
}
public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
}
let resInput: MTLTexture
if let inPre = preProcessKernle {
do {
try inPre.compute(inputTexuture: input, commandBuffer: buffer)
resInput = inPre.outputTexture
} catch let error {
throw error
}
} else {
resInput = input
}
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
program.scope.setInput(input: inputTexture)
for op in ops {
do {
try op.run(device: device, buffer: buffer)
} catch let error {
throw error
}
}
buffer.addCompletedHandler { (commandbuffer) in
// let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p
// })
// print(inputArr)
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for op in self.ops {
// op.delogOutput()
// }
// return
// self.ops[2].delogOutput()
let afterDate = Date.init()
guard let outputVar = self.program.scope.output() else {
fatalError("output nil")
}
guard let output = outputVar as? Texture<P> else {
fatalError("output var type error")
}
let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
return p
}), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
completionHandle(resultHodlder)
}
buffer.commit()
}
public func clear() {
program.scope.clear()
}
}
//public let paddle_executor: Executor = Executor.init()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class Genet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [128.0, 128.0, 128.0]
scale = 0.017
except = 0
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [128.0, 128.0, 128.0]
scale = 0.017
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3)
}
class GenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import SwiftProtobuf
public class Loader<P: PrecisionType> {
class ParaLoader {
let file: UnsafeMutablePointer<FILE>
let fileSize: Int
var nowIndex: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
nowIndex = 0
}
func read(tensor: Tensor<P>) throws {
guard nowIndex <= fileSize else {
throw PaddleMobileError.loaderError(message: "out of the file range")
}
func pointerReader<T>(type: T.Type) -> T {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
fread(ptr, 1, MemoryLayout<T>.size, file)
nowIndex += MemoryLayout<T>.size
let pointee = ptr.pointee
ptr.deinitialize(count: MemoryLayout<UInt32>.size)
ptr.deallocate()
return pointee
}
let _ = pointerReader(type: UInt32.self)
let lodLevel = pointerReader(type: UInt64.self)
for _ in 0..<lodLevel {
let size = pointerReader(type: UInt64.self)
for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
_ = pointerReader(type: size_t.self)
}
}
let _ = pointerReader(type: UInt32.self)
let tensorDescSize = pointerReader(type: Int32.self)
fseek(file, Int(tensorDescSize), SEEK_CUR)
nowIndex += Int(tensorDescSize)
/*
这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
*/
//现在模型传入模型为 Float 类型, 这块应该根据模型来
// let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
// let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
guard bytesRead == tensor.data.size else {
throw PaddleMobileError.loaderError(message: "param read size error")
}
// TODO: use script to convert
// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
// for i in 0..<tensor.numel() {
// tensor.data[i] = P.init(inFloat: tmpPointer[i])
// }
// tmpPointer.deinitialize(count: tmpCapacity)
// tmpPointer.deallocate()
nowIndex += bytesRead
}
deinit {
fclose(file)
}
}
public init(){}
public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
}
do {
let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
serializedData: modelData)
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
print(programDesc)
guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
throw PaddleMobileError.loaderError(message: "load para error")
}
guard programDesc.blocks.count > 0 else {
throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
}
// to get feed key and fetch key
let block = programDesc.blocks[0]
guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
throw PaddleMobileError.loaderError(message: "at least two operator")
}
guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
}
guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
}
guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
}
let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
// to load memory
for block in programDesc.blocks {
for varDesc in block.vars {
if (varDesc.type == .LodTensor) {
guard let tensorDesc = varDesc.tensorDesc else {
throw PaddleMobileError.loaderError(message: "get tensor desc failed")
}
// guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout<P>.size else {
// throw PaddleMobileError.memoryError(message: "PrecisionType not support")
// }
if (varDesc.persistable
&& varDesc.type != .FeedMiniBatch
&& varDesc.type != .FetchList) {
let dimArr = tensorDesc.dims
guard dimArr.count > 0 else {
throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
}
let dim = Dim.init(inDim: dimArr)
let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
do {
try paraLoader.read(tensor: tensor)
} catch let error {
throw error
}
tensor.convert(to: .NHWC)
// tensor.initBuffer(device: device)
scope[varDesc.name] = tensor
} else {
let dim = Dim.init(inDim: tensorDesc.NHWCDim)
scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
}
} else {
if varDesc.name == fetchKey {
scope[varDesc.name] = ResultHolder<P>.init(inDim: [], inResult: [], inElapsedTime: 0.0)
} else if varDesc.name == feedKey {
}
}
}
}
let program = Program.init(inProgramDesc: programDesc, inParamPath: paraPath, inScope: scope)
return program
} catch _ {
throw PaddleMobileError.loaderError(message: "protobuf decoder error")
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String {
guard let resPointer = res.result else {
fatalError()
}
var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
override init(device: MTLDevice) {
super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 0
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class MobileNet_ssd_hand: Net{
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2
modelPath = ""
paramPath = ""
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3)
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res)"
}
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError()
// }
//
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ")
//// print(scoreFormatArr.strideArray())
////
// var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ")
//// print(bboxArr.strideArray())
//
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0;
//
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
//
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " )
// }
//
// let output: [Float32] = result.map { $0.floatValue }
//
//
// return output
fatalError()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class MobileNet_ssd_AR: Net{
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2
modelPath = ""
paramPath = ""
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3)
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result![0])"
}
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let interRes = paddleMobileRes.intermediateResults else {
fatalError(" need have inter result ")
}
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
fatalError(" need score ")
}
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
fatalError()
}
// let startDate = Date.init()
// print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray())
//
// print("bbox arr: ")
//
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.25
nmsCompute.nmsTopK = 100
nmsCompute.keepTopK = 100
nmsCompute.nmsEta = 1.0
nmsCompute.nmsThreshold = 0.449999988
nmsCompute.background_label = 0;
nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
fatalError( " result error " )
}
let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) {
//
// print("i \(i) : \(result.output[i])")
// }
// print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0])
return resultHolder
}
override func updateProgram(program: Program) {
for i in [56, 66, 76, 86, 93, 99] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
program.scope[output] = originTexture
if i == 99 {
opDesc.attrs["axis"] = 0
} else {
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
}
for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
for i in [60, 101, 90, 97, 70, 80] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
}
for i in [102] {
let opDesc = program.programDesc.blocks[0].ops[i]
for output in opDesc.outputs["Out"]! {
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
}
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
print(" split axis \(opDesc.attrs["axis"])")
}
// 99
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class ResultHolder: NSObject {
@objc public let result: UnsafeMutablePointer<Float32>?
@objc public let capacity: Int
init(inResult: UnsafeMutablePointer<Float32>?, inCapacity: Int) {
result = inResult
capacity = inCapacity
}
@objc public func releasePointer() {
result?.deinitialize(count: capacity)
result?.deallocate()
}
}
public class Net: NSObject {
var except: Int = 0
var means: [Float] = []
var scale: Float = 0.0
var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
var preprocessKernel: CusomKernel? = nil
var paramPointer: UnsafeMutableRawPointer? = nil
var paramSize: Int = 0
var modelPointer: UnsafeMutableRawPointer? = nil
var modelSize: Int = 0
var modelPath: String = ""
var paramPath: String = ""
var modelDir: String = ""
@objc public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
self.paramPointer = paramPointer
self.paramSize = paramSize
self.modelPointer = modePointer
self.modelSize = modelSize
super.init()
}
public func resultStr(res: ResultHolder) -> String {
fatalError()
}
func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity)
}
@objc public init(device: MTLDevice) {
super.init()
}
func updateProgram(program: Program) {
}
}
...@@ -43,14 +43,31 @@ class OpCreator<P: PrecisionType> { ...@@ -43,14 +43,31 @@ class OpCreator<P: PrecisionType> {
[gConvType : ConvOp<P>.creat, [gConvType : ConvOp<P>.creat,
gBatchNormType : BatchNormOp<P>.creat, gBatchNormType : BatchNormOp<P>.creat,
gReluType : ReluOp<P>.creat, gReluType : ReluOp<P>.creat,
gElementwiseAdd : ElementwiseAddOp<P>.creat, gElementwiseAddType : ElementwiseAddOp<P>.creat,
gFeedType : FeedOp<P>.creat, gFeedType : FeedOp<P>.creat,
gFetchType : FetchOp<P>.creat, gFetchType : FetchOp<P>.creat,
gConvAddBatchNormReluType : ConvAddBatchNormReluOp<P>.creat, gConvAddBatchNormReluType : ConvAddBatchNormReluOp<P>.creat,
gPooType : PoolOp<P>.creat, gPooType : PoolOp<P>.creat,
gSoftmaxType : SoftmaxOp<P>.creat, gSoftmaxType : SoftmaxOp<P>.creat,
gReshapeType : ReshapeOp<P>.creat, gReshapeType : ReshapeOp<P>.creat,
gConvAddType : ConvAddOp<P>.creat] gConvAddType : ConvAddOp<P>.creat,
gDepthConvType : DepthConvOp<P>.creat,
gConcatType : ConcatOp<P>.creat,
gBoxcoderType : BoxcoderOp<P>.creat,
gConvBnReluType : ConvBNReluOp<P>.creat,
gDwConvBnReluType : DwConvBNReluOp<P>.creat,
gMulticlassNMSType : MulticlassNMSOp<P>.creat,
gTransposeType : TransposeOp<P>.creat,
gPriorBoxType : PriorBoxOp<P>.creat,
gPreluType : PreluOp<P>.creat,
gConv2dTransposeType : ConvTransposeOp<P>.creat,
gBilinearInterpType : BilinearInterpOp<P>.creat,
gSplit : SplitOp<P>.creat,
gShape : ShapeOp<P>.creat,
gFlatten : FlattenOp<P>.creat,
gConvAddPreluType : ConvAddPreluOp<P>.creat,
gConvAddAddPreluType : ConvAddAddPreluOp<P>.creat,
gElementwiseAddPreluType: ElementwiseAddPreluOp<P>.creat]
private init(){} private init(){}
} }
...@@ -22,147 +22,199 @@ import Foundation ...@@ -22,147 +22,199 @@ import Foundation
*/ */
protocol OpParam { protocol OpParam {
associatedtype OutputType: Variant associatedtype OutputType: Variant
var output: OutputType { get set } var output: OutputType { get set }
func outputDesc() -> String func outputDesc() -> String
associatedtype ParamPrecisionType: PrecisionType associatedtype ParamPrecisionType: PrecisionType
init(opDesc: OpDesc, inScope: Scope) throws init(opDesc: OpDesc, inScope: Scope) throws
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
static func paramInputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
} }
extension OpParam { extension OpParam {
func outputDesc() -> String { func outputDesc() -> String {
return output.debugDescription return output.debugDescription
}
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
guard let mapKeys = map[key], mapKeys.count > 0 else {
throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
} }
guard let variant = from[mapKeys[0]] else {
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType { throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
guard let mapKeys = map[key], mapKeys.count > 0 else {
throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
}
guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
}
return v
} }
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType { guard let v = variant as? VarType else {
do { throw PaddleMobileError.paramError(message: " type error")
let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
return tensorX
} catch let error {
throw error
}
} }
return v
static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType { }
do {
let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
return tensorInput do {
} catch let error { let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from)
throw error return tensorVariances
} } catch let error {
throw error
} }
}
static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do { static func paramInputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) do {
return tensorOutput let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from)
} catch let error { return alphaTensor
throw error } catch let error {
} throw error
}
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
return tensorOutputY
} catch let error {
throw error
}
}
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
return tensorY
} catch let error {
throw error
}
} }
}
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
return out do {
} catch let error { let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from)
throw error return tensorImage
} } catch let error {
} throw error
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
return tensorFilter
} catch let error {
throw error
}
} }
}
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) do {
return tensorBias let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
} catch let error { return tensorX
throw error } catch let error {
} throw error
} }
}
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) do {
return tensorMean let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from)
} catch let error { return tensorBox
throw error } catch let error {
} throw error
} }
}
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) do {
return tensorScale let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
} catch let error { return tensorInput
throw error } catch let error {
} throw error
} }
}
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) do {
return tensorVariance let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
} catch let error { return tensorOutput
throw error } catch let error {
} throw error
}
}
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
return tensorOutputY
} catch let error {
throw error
}
}
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
return tensorY
} catch let error {
throw error
}
}
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
return out
} catch let error {
throw error
}
}
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
return tensorFilter
} catch let error {
throw error
}
}
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
return tensorBias
} catch let error {
throw error
}
}
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
return tensorMean
} catch let error {
throw error
}
}
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
return tensorScale
} catch let error {
throw error
}
}
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
return tensorVariance
} catch let error {
throw error
}
}
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
guard let attr = attrs[key] else {
throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
} }
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{ guard let tAttr = attr as? T else {
guard let attr = attrs[key] else { throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
}
guard let tAttr = attr as? T else {
throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
}
return tAttr
} }
return tAttr
}
} }
...@@ -16,100 +16,118 @@ import Metal ...@@ -16,100 +16,118 @@ import Metal
import Foundation import Foundation
protocol Fusion { protocol Fusion {
static func fusionNode() -> Node static func fusionNode() -> Node
static func change() -> [String : [(from: String, to: String)]] static func change() -> [String : [(from: String, to: String)]]
static func fusionType() -> String static func fusionType() -> String
static func needCheck() -> [(Int, String)]
}
extension Fusion {
static func needCheck() -> [(Int, String)] {
return []
}
} }
protocol Runable { protocol Runable {
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput() func delogOutput()
func inputVariant() -> [String : [Variant]]
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer)
} }
extension Runable where Self: OperatorProtocol{ extension Runable where Self: OperatorProtocol{
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws { func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { do {
try runImpl(device: device, buffer: buffer) try runImpl(device: device, buffer: buffer)
} catch let error { } catch let error {
throw error throw error
}
// print(type + ": " + para.outputDesc())
} }
}
func inputVariant() -> [String : [Variant]] {
// return [:]
fatalError(" op \(type) need implement inputVariant")
}
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
fatalError(" need implement ")
}
func delogOutput() {
func delogOutput() { print(type + ": has no implementation" )
print(type + ": has no implementation" ) }
}
} }
protocol Creator where Self: OperatorProtocol{ protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
} }
extension Creator where Self: OperatorProtocol { extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType { static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
do { do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope) return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
} catch let error { } catch let error {
throw error throw error
}
} }
}
} }
protocol InferShaperable { protocol InferShaperable {
func inferShape() func inferShape()
} }
protocol OperatorProtocol { protocol OperatorProtocol {
associatedtype ParamType associatedtype ParamType
associatedtype KerType: Computable where Self.KerType.ParamType == ParamType associatedtype KerType: Computable where Self.KerType.ParamType == ParamType
var type: String { get } var type: String { get }
var scope: Scope { get } var scope: Scope { get }
var inputs: [String : [String]] { get } var inputs: [String : [String]] { get }
var paraInputs: [String : [String]] { get set } var paraInputs: [String : [String]] { get set }
var outpus: [String : [String]] { get } var outpus: [String : [String]] { get }
var attrs: [String : Attr] { get } var attrs: [String : Attr] { get }
var para: ParamType { get } var para: ParamType { get }
var kernel: KerType { get } var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
} }
extension OperatorProtocol { extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self { static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
do { do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope) return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error { } catch let error {
throw error throw error
}
} }
}
} }
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType { class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
typealias ParamType = ParameterType typealias ParamType = ParameterType
typealias KerType = KernelType typealias KerType = KernelType
let type: String let type: String
let inputs: [String : [String]] let inputs: [String : [String]]
var paraInputs: [String : [String]] var paraInputs: [String : [String]]
let outpus: [String : [String]] let outpus: [String : [String]]
let attrs: [String : Attr] let attrs: [String : Attr]
let para: ParamType let para: ParamType
let scope: Scope let scope: Scope
var kernel: KerType var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
type = opDesc.type // print("create op: \(opDesc.type)")
scope = inScope type = opDesc.type
inputs = opDesc.inputs scope = inScope
outpus = opDesc.outputs inputs = opDesc.inputs
attrs = opDesc.attrs outpus = opDesc.outputs
paraInputs = opDesc.paraInputs attrs = opDesc.attrs
do { paraInputs = opDesc.paraInputs
para = try ParamType.init(opDesc:opDesc, inScope: inScope) do {
} catch let error { para = try ParamType.init(opDesc:opDesc, inScope: inScope)
throw error } catch let error {
} throw error
kernel = KernelType.init(device: device, param: para)
} }
kernel = KernelType.init(device: device, param: para)
}
} }
// op infos // op infos
...@@ -118,22 +136,57 @@ let gFeedType = "feed" ...@@ -118,22 +136,57 @@ let gFeedType = "feed"
let gConvType = "conv2d" let gConvType = "conv2d"
let gBatchNormType = "batch_norm" let gBatchNormType = "batch_norm"
let gReluType = "relu" let gReluType = "relu"
let gElementwiseAdd = "elementwise_add" let gElementwiseAddType = "elementwise_add"
let gConvAddBatchNormReluType = "conv_add_batchnorm_relu" let gConvAddBatchNormReluType = "conv_add_batchnorm_relu"
let gPooType = "pool2d" let gPooType = "pool2d"
let gSoftmaxType = "softmax" let gSoftmaxType = "softmax"
let gReshapeType = "reshape" let gReshapeType = "reshape"
let gConvAddType = "conv_add" let gConvAddType = "conv_add"
let gDepthConvType = "depthwise_conv2d"
let gPriorBoxType = "prior_box"
let gTransposeType = "transpose"
let gConcatType = "concat"
let gBoxcoderType = "box_coder"
let gMulticlassNMSType = "multiclass_nms"
let gConvBnReluType = "conv_bn_relu"
let gDwConvBnReluType = "depth_conv_bn_relu"
let gPreluType = "prelu"
let gConv2dTransposeType = "conv2d_transpose"
let gBilinearInterpType = "bilinear_interp"
let gSplit = "split"
let gShape = "shape"
let gFlatten = "flatten"
let gConvAddPreluType = "conv_add_prelu"
let gConvAddAddPreluType = "conv_add_add_prelu"
let gElementwiseAddPreluType = "elementwise_add_prelu"
let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]), let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]),
gBatchNormType : (inputs: ["X"], outputs: ["Y"]), gBatchNormType : (inputs: ["X"], outputs: ["Y"]),
gReluType : (inputs: ["X"], outputs: ["Out"]), gReluType : (inputs: ["X"], outputs: ["Out"]),
gElementwiseAdd : (inputs: ["X"], outputs: ["Out"]), gElementwiseAddType : (inputs: ["X"], outputs: ["Out"]),
gFeedType : (inputs: ["X"], outputs: ["Out"]), gFeedType : (inputs: ["X"], outputs: ["Out"]),
gFetchType : (inputs: ["X"], outputs: ["Out"]), gFetchType : (inputs: ["X"], outputs: ["Out"]),
gConvAddBatchNormReluType : (inputs: ["Input"], outputs: ["Out"]), gConvAddBatchNormReluType : (inputs: ["Input"], outputs: ["Out"]),
gPooType : (inputs: ["X"], outputs: ["Out"]), gPooType : (inputs: ["X"], outputs: ["Out"]),
gSoftmaxType : (inputs: ["X"], outputs: ["Out"]), gSoftmaxType : (inputs: ["X"], outputs: ["Out"]),
gReshapeType : (inputs: ["X"], outputs: ["Out"]), gReshapeType : (inputs: ["X"], outputs: ["Out"]),
gConvAddType : (inputs: ["Input"], outputs: ["Out"])] gConvAddType : (inputs: ["Input"], outputs: ["Out"]),
gDepthConvType : (inputs: ["Input"], outputs: ["Output"]),
gConcatType : (inputs: ["X"], outputs: ["Out"]),
gBoxcoderType : (inputs: ["PriorBox", "PriorBoxVar", "TargetBox"], outputs: ["OutputBox"]),
gTransposeType : (inputs: ["X"], outputs: ["Out"]),
gConvBnReluType : (inputs: ["Input"], outputs: ["Out"]),
gDwConvBnReluType : (inputs: ["Input"], outputs: ["Out"]),
gMulticlassNMSType : (inputs: ["BBoxes", "Scores"], outputs: ["Out"]),
gPriorBoxType : (inputs: ["Input", "Image"], outputs: ["Boxes", "Variances"]),
gPreluType : (inputs: ["X"], outputs: ["Out"]),
gConv2dTransposeType : (inputs: ["Input"], outputs: ["Output"]),
gBilinearInterpType : (inputs: ["X"], outputs: ["Out"]),
gSplit : (inputs: ["X"], outputs: ["Out"]),
gShape : (inputs: ["Input"], outputs: ["Out"]),
gFlatten : (inputs: ["X"], outputs: ["Out"]),
gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"])
]
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
// You may obtain a copy of the License at You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and See the License for the specific language governing permissions and
// limitations under the License. */ limitations under the License. */
import Foundation import Foundation
class BatchNormParam<P: PrecisionType>: OpParam { class BatchNormParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope) input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope) if input.transpose != [0, 2, 3, 1] {
inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) fatalError("batch norm only accepts NHWC")
inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope) }
inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope) output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) bias = try BatchNormParam.getFirstTensor(key: "Bias", map: opDesc.paraInputs, from: inScope)
epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs) mean = try BatchNormParam.getFirstTensor(key: "Mean", map: opDesc.paraInputs, from: inScope)
momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs) scale = try BatchNormParam.getFirstTensor(key: "Scale", map: opDesc.paraInputs, from: inScope)
is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs) variance = try BatchNormParam.getFirstTensor(key: "Variance", map: opDesc.paraInputs, from: inScope)
} catch let error { epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
throw error momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
} } catch let error {
throw error
} }
let input: Texture<P> }
var output: Texture<P> let input: Texture<P>
let inputBias: Tensor<ParamPrecisionType> var output: Texture<P>
let inputMean: Tensor<ParamPrecisionType> let bias: Tensor<P>
let inputScale: Tensor<ParamPrecisionType> let mean: Tensor<P>
let inputVariance: Tensor<ParamPrecisionType> let scale: Tensor<P>
let epsilon: Float let variance: Tensor<P>
let momentum: Float let epsilon: Float
let is_test: Bool let momentum: Float
} }
class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { typealias OpType = BatchNormOp<P>
para.output.dim = para.input.dim
} func inferShape() {
typealias OpType = BatchNormOp<P> para.output.dim = para.input.dim
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { }
do { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
try kernel.compute(commandBuffer: buffer, param: para) do {
} catch let error { try kernel.compute(commandBuffer: buffer, param: para)
throw error } catch let error {
} throw error
} }
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class BilinearInterpParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope)
out_h = try BilinearInterpParam.getAttr(key: "out_h", attrs: opDesc.attrs)
out_w = try BilinearInterpParam.getAttr(key: "out_w", attrs: opDesc.attrs)
} catch let error {
throw error
}
if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
fatalError()
}
}
let input: Texture<P>
var output: Texture<P>
let out_h: Int
let out_w: Int
}
class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, BilinearInterpParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = BilinearInterpOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
// print(outputArray)
print(outputArray.strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class BoxcoderParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope)
output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope)
codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs)
boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs)
} catch let error {
throw error
}
assert(priorBox.tensorDim.cout() == 2)
assert(priorBoxVar.tensorDim.cout() == 2)
assert(targetBox.tensorDim.cout() == 3)
assert(output.tensorDim.cout() == 3)
assert(priorBox.transpose == [0, 1, 2, 3])
assert(priorBoxVar.transpose == [0, 1, 2, 3])
assert(targetBox.transpose == [0, 1, 2, 3])
assert(codeType == "decode_center_size") // encode_center_size is not implemented
assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
}
let priorBox: Texture<P>
let priorBoxVar: Texture<P>
let targetBox: Texture<P>
var output: Texture<P>
let codeType: String
let boxNormalized: Bool
}
class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = BoxcoderOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let pbv : [Float32] = device.texture2tensor(texture: para.priorBoxVar.metalTexture!, dim: para.priorBoxVar.tensorDim.dims, transpose: para.priorBoxVar.transpose)
let pb : [Float32] = device.texture2tensor(texture: para.priorBox.metalTexture!, dim: para.priorBox.tensorDim.dims, transpose: para.priorBox.transpose)
let tb : [Float32] = device.texture2tensor(texture: para.targetBox.metalTexture!, dim: para.targetBox.tensorDim.dims, transpose: para.targetBox.transpose)
let out : [Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(" prior box var ")
print(pbv.strideArray())
print(" target box ")
print(tb.strideArray())
print(" prior box ")
print(pb.strideArray())
print(" output ")
print(out.strideArray())
}
}
//
// CNNConvAddBatchNormReluOp.swift
// paddle-mobile
import Foundation
class CNNMPSConvTestParam: TestParam {
var outputTexture: MTLTexture?
var metalParam: MetalConvParam
let filterPointer: UnsafeMutableRawPointer
let biasePointer: UnsafeMutablePointer<Float>
let filterSize: (width: Int, height: Int, channel: Int)
init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) {
metalParam = inMetalParam
filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter)
biasePointer = UnsafeMutablePointer.init(mutating: inBiase)
filterSize = inFilterSize
}
}
@available(iOS 10.0, *)
class CNNMPSConvOp<P: PrecisionType>: Operator<CNNConvKernel<P>, CNNConvParam<P>>, Runable, Creator, InferShaperable, Fusion {
typealias OpType = CNNMPSConvOp<P>
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
fatalError()
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
}
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode-->Node.init(inType: gElementwiseAdd);
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gMPSCNNConvType
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConcatParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
guard let xlist = opDesc.inputs["X"] else {
fatalError()
}
for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else {
fatalError()
}
if transpose.count == 0 {
transpose = v.transpose
}
if v.transpose != transpose {
fatalError()
}
input.append(v)
}
axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs)
output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
}
}
var input: [Texture<P>] = []
var output: Texture<P>
var transpose: [Int] = []
let axis: Int
}
class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = ConcatOp<P>
func inferShape() {
// let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]}
// para.output.dim = Dim.init(inDim: dim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
alpha = try ConvAddAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ConvAddAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
y = try ConvAddAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvAddAddPreluOp<P: PrecisionType>: Operator<ConvAddAddPreluKernel<P>, ConvAddAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddAddPreluOp<P>
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddAddPreluType
}
static func needCheck() -> [(Int, String)] {
return [(2, "Y"), (2, "X")]
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
...@@ -14,119 +14,117 @@ ...@@ -14,119 +14,117 @@
import Foundation import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope) filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope) input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs) output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs) stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs) paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs) dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope) scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
} catch let error { mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
throw error y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} } catch let error {
throw error
} }
}
let input: Texture<P>
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType> let variance: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType> let bias: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType> let mean: Tensor<ParamPrecisionType>
let y: Tensor<ParamPrecisionType> let scale: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType> let y: Tensor<ParamPrecisionType>
let epsilon: Float32 let filter: Tensor<ParamPrecisionType>
var newScale: MTLBuffer? let epsilon: Float32
var newBiase: MTLBuffer? var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32] var output: Texture<P>
let paddings: [Int32] let stride: [Int32]
let dilations: [Int32] let paddings: [Int32]
let groups: Int let dilations: [Int32]
let groups: Int
} }
class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddBatchNormReluOp<P>
typealias OpType = ConvAddBatchNormReluOp<P>
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() { var outDim = [inDims[0]]
let inDims = para.input.dim for i in 0..<strides.count {
let filterDim = para.filter.dim let dilation: Int = Int(dilations[i])
let strides = para.stride let filterSize: Int = filterDim[i + 1]
let paddings = para.paddings let inputSize: Int = inDims[i + 1]
let dilations = para.dilations let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
var outDim = [inDims[0]] let dKernel = dilation * (filterSize - 1) + 1
for i in 0..<strides.count { let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
let dilation: Int = Int(dilations[i]) outDim.append(outputSize)
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
} }
outDim.append(filterDim[0])
static func fusionNode() -> Node { para.output.dim = Dim.init(inDim: outDim)
let beginNode = Node.init(inType: gConvType) }
_ = beginNode
--> Node.init(inType: gElementwiseAdd) func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
--> Node.init(inType: gBatchNormType) do {
--> Node.init(inType: gReluType) try kernel.compute(commandBuffer: buffer, param: para)
return beginNode } catch let error {
throw error
} }
}
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType)
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddBatchNormReluType
}
func delogOutput() {
print(" conv add batchnorm relu output ")
print(para.output.toTensor().strideArray())
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
static func change() -> [String : [(from: String, to: String)]] { // print("biase: \(para.y)")
return [:] // print("padding: \(para.paddings)")
} // print("stride: \(para.stride)")
static func fusionType() -> String { // let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
return gConvAddBatchNormReluType // let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
} // let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
func delogOutput() { // let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
}
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
}
//
writeToLibrary(fileName: "output_112x112x32_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
}
} }
...@@ -15,79 +15,102 @@ ...@@ -15,79 +15,102 @@
import Foundation import Foundation
class ConvAddParam<P: PrecisionType>: OpParam { class ConvAddParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs) stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs) paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs) dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs) groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error { y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
throw error } catch let error {
} throw error
} }
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType> let input: Texture<P>
let filter: Tensor<ParamPrecisionType> let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32] var output: Texture<P>
let paddings: [Int32] let stride: [Int32]
let dilations: [Int32] let paddings: [Int32]
let groups: Int let dilations: [Int32]
let groups: Int
} }
class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
static func fusionNode() -> Node { typealias OpType = ConvAddOp<P>
let beginNode = Node.init(inType: gConvType)
_ = beginNode static func fusionNode() -> Node {
--> Node.init(inType: gElementwiseAdd) let beginNode = Node.init(inType: gConvType)
return beginNode _ = beginNode
} --> Node.init(inType: gElementwiseAddType)
return beginNode
static func change() -> [String : [(from: String, to: String)]] { }
return [:]
} static func change() -> [String : [(from: String, to: String)]] {
return [:]
static func fusionType() -> String { }
return gConvAddType
} static func fusionType() -> String {
return gConvAddType
}
func inferShape() {
typealias OpType = ConvAddOp<P> let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() { var outDim = [inDims[0]]
let inDims = para.input.dim for i in 0..<strides.count {
let filterDim = para.filter.dim let dilation: Int = Int(dilations[i])
let strides = para.stride let filterSize: Int = filterDim[i + 1]
let paddings = para.paddings let inputSize: Int = inDims[i + 1]
let dilations = para.dilations let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
var outDim = [inDims[0]] let dKernel = dilation * (filterSize - 1) + 1
for i in 0..<strides.count { let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
let dilation: Int = Int(dilations[i]) outDim.append(outputSize)
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
} }
outDim.append(filterDim[0])
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { para.output.dim = Dim.init(inDim: outDim)
do { }
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
throw error do {
} try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
}
func delogOutput() {
// print("op \(type): ")
// print(" padding: ")
// print(para.paddings)
// print("stride: ")
// print(para.stride)
// print("dilations: ")
// print(para.dilations)
// print(" para input dim: ")
// print(para.input.dim)
// print(" para filter dim: ")
// print(para.filter.dim)
// print(" para output dim: ")
// print(para.output.dim)
// print(" biase: ")
// let biase: [Float32] = para.y.buffer.array()
// print(biase)
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
alpha = try ConvAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ConvAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
y = try ConvAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvAddPreluOp<P: PrecisionType>: Operator<ConvAddPreluKernel<P>, ConvAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddPreluOp<P>
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddPreluType
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvBNReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvBNReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvBNReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvBNReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvBNReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvBNReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvBNReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvBNReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvBNReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvBNReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvBNReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let epsilon: Float32
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvBNReluOp<P>
func inputs() -> [Variant] {
return [para.input, para.variance, para.bias, para.mean, para.scale, para.filter]
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvBnReluType
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
}
}
...@@ -15,74 +15,67 @@ ...@@ -15,74 +15,67 @@
import Foundation import Foundation
class ConvParam<P: PrecisionType>: OpParam { class ConvParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvParam.output(outputs: opDesc.outputs, from: inScope) output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs) stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs) paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs) dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs) groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
} catch let error { } catch let error {
throw error throw error
}
} }
}
let input: Texture<P>
let filter: Tensor<ParamPrecisionType> let input: Texture<P>
var output: Texture<P> let filter: Tensor<ParamPrecisionType>
let stride: [Int32] var output: Texture<P>
let paddings: [Int32] let stride: [Int32]
let dilations: [Int32] let paddings: [Int32]
let groups: Int let dilations: [Int32]
let groups: Int
} }
class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable { class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { typealias OpType = ConvOp<P>
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope) func inferShape() {
} catch let error { let inDims = para.input.dim
throw error let filterDim = para.filter.dim
} let strides = para.stride
let paddings = para.paddings
} let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
typealias OpType = ConvOp<P> var outDim = [inDims[0]]
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { for i in 0..<strides.count {
do { let dilation: Int = Int(dilations[i])
try kernel.compute(commandBuffer: buffer, param: para) let filterSize: Int = filterDim[i + 1]
} catch let error { let inputSize: Int = inDims[i + 1]
throw error let padding: Int = Int(paddings[i])
} let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
} }
outDim.append(filterDim[0])
func delogOutput() { para.output.dim = Dim.init(inDim: outDim)
print("conv output : ") }
print(para.output.metalTexture)
// let _: Float16? = para.output.metalTexture.logDesc() func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
}
func delogOutput() {
print("conv output : ")
print(para.output.toTensor().strideArray())
// let _: Float16? = para.output.metalTexture.logDesc()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
}
class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTransposeParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = ConvTransposeOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let padToFourDim = para.output.padToFourDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
let output = para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3]))
print(output.strideArray())
} else {
print(" not implement")
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = DepthConvOp<P>
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class DwConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvBNReluOp<P>
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gDepthConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gDwConvBnReluType
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
}
}
...@@ -15,33 +15,80 @@ ...@@ -15,33 +15,80 @@
import Foundation import Foundation
class ElementwiseAddParam<P: PrecisionType>: OpParam { class ElementwiseAddParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) } catch let error {
axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs) throw error
} catch let error {
throw error
}
} }
let input: Texture<P> do {
let inputY: Tensor<P> inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
var output: Texture<P> } catch _ {
let axis: Int let tensorY: Tensor<P> = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
// if computePrecision == .Float32 {
// super.init(device: device, inFunctionName: "elementwise_add")
// } else if computePrecision == .Float16 {
// super.init(device: device, inFunctionName: "elementwise_add_half")
// } else {
// fatalError()
// }
// }
var offset = axis
if axis == -1 {
offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
}
for i in 0..<(inputY.tensorDim.cout()) {
assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
}
}
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var axis: Int
} }
class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = ElementwiseAddOp<P>
func inferShape() {
para.output.dim = para.input.dim func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
}
func delogOutput() {
print(" \(type) output: ")
print(para.output)
typealias OpType = ElementwiseAddOp<P> let padToFourDim = para.output.padToFourDim
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
} }
}
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
inputX = try ElementwiseAddPreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ElementwiseAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddPreluParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
}
do {
inputY = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch _ {
let tensorY: Tensor<P> = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
// if computePrecision == .Float32 {
// super.init(device: device, inFunctionName: "elementwise_add")
// } else if computePrecision == .Float16 {
// super.init(device: device, inFunctionName: "elementwise_add_half")
// } else {
// fatalError()
// }
// }
var offset = axis
if axis == -1 {
offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
}
for i in 0..<(inputY.tensorDim.cout()) {
assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
}
}
let mode: String
let alpha: Tensor<P>
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var axis: Int
}
class ElementwiseAddPreluOp<P: PrecisionType>: Operator<ElementwiseAddPreluKernel<P>, ElementwiseAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gElementwiseAddType)
_ = beginNode
--> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gElementwiseAddPreluType
}
typealias OpType = ElementwiseAddPreluOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output)
let padToFourDim = para.output.padToFourDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
}
...@@ -15,54 +15,53 @@ ...@@ -15,54 +15,53 @@
import Foundation import Foundation
class FeedParam<P: PrecisionType>: OpParam{ class FeedParam<P: PrecisionType>: OpParam{
var output: Texture<P> var output: Texture<P>
var input: InputTexture { var input: InputTexture {
return scope.input() as! InputTexture return scope.input() as! InputTexture
}
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
} }
let scope: Scope }
required init(opDesc: OpDesc, inScope: Scope) throws { typealias ParamPrecisionType = P
scope = inScope
do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
}
}
typealias ParamPrecisionType = P
} }
class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable { class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FeedOp<P> typealias OpType = FeedOp<P>
func inferShape() { func inferShape() {
// print("feed input: \(para.input.expectDim)") // print("feed input: \(para.input.expectDim)")
print("feed output: \(para.output.dim)") print("feed output: \(para.output.dim)")
// para.output.dim = // para.output.dim =
// para.output.dim = para.input.expectDim // para.output.dim = para.input.expectDim
} }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { do {
try kernel.compute(commandBuffer: buffer, param: para) try kernel.compute(commandBuffer: buffer, param: para)
} catch let error { } catch let error {
throw error throw error
}
// let resizeKernel = ResizeKernel<P>.init(device: device)
// let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
// do {
// try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
// } catch let error {
// throw error
// }
} }
func delogOutput() { // let resizeKernel = ResizeKernel<P>.init(device: device)
// para.input.mtlTexture.logDesc() // let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true) // do {
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false) // try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
} // } catch let error {
// throw error
// }
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
}
} }
...@@ -15,40 +15,73 @@ ...@@ -15,40 +15,73 @@
import Foundation import Foundation
class FetchParam<P: PrecisionType>: OpParam{ class FetchParam<P: PrecisionType>: OpParam{
var output: Texture<P> var output: FetchHolder
let input: Texture<P> let input: Texture<P>
let scope: Scope let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope scope = inScope
do { do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = input output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims)
} catch let error { scope.setOutput(output: output)
throw error } catch let error {
} throw error
} }
}
typealias ParamPrecisionType = P
typealias ParamPrecisionType = P
} }
class FetchKernel<P: PrecisionType>: Kernel, Computable { class FetchKernel<P: PrecisionType>: Kernel, Computable {
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
required init(device: MTLDevice, param: FetchParam<P>) { encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
super.init(device: device, inFunctionName: "texture2d_to_2d_array") encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: FetchParam<P>) {
param.output.initBuffer(device: device)
if computePrecision == .Float16 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch_half")
} else {
// fatalError(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder_half")
print(" not support ")
}
} else if computePrecision == .Float32 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch")
} else {
print(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder")
// fatalError(" not support ")
}
} else {
fatalError(" not support ")
} }
}
} }
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{ class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
func inferShape() {
print(para.input.dim) typealias OpType = FetchOp<P>
}
func inferShape() {
typealias OpType = FetchOp<P> print(para.input.dim)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { }
scope.setOutput(output: para.output)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class FlattenParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try FlattenParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let axis: Int
}
class FlattenOp<P: PrecisionType>: Operator<FlattenKernel<P>, FlattenParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = FlattenOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
public protocol TestParam {
}
public protocol Testable {
associatedtype TestParamType: TestParam
func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
init(device: MTLDevice, testParam: TestParamType)
}
protocol Computable {
associatedtype ParamType: OpParam
func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
init(device: MTLDevice, param: ParamType)
}
protocol KernelProtocol {
var pipline: MTLComputePipelineState { get set }
var functionName: String { get set }
}
open class Kernel {
let pipline: MTLComputePipelineState
let functionName: String
public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
functionName = inFunctionName
}
}
open class CusomKernel: Kernel {
public struct Shape {
public let width: Int
public let height: Int
public let channel: Int
public init(inWidth: Int, inHeight: Int, inChannel: Int){
width = inWidth
height = inHeight
channel = inChannel
}
}
public let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
if computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
} else {
fatalError()
}
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
}
public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(inputTexuture, index: 0)
encoder.setTexture(outputTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
encoder.endEncoding()
}
}
...@@ -15,53 +15,39 @@ ...@@ -15,53 +15,39 @@
import Foundation import Foundation
class BatchNormKernel<P: PrecisionType>: Kernel, Computable { class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
var newScale: MTLBuffer required init(device: MTLDevice, param: BatchNormParam<P>) {
var newBias: MTLBuffer let count = param.variance.dim.numel()
let varianceP = param.variance.data.pointer
required init(device: MTLDevice, param: BatchNormParam<P>) { let meanP = param.mean.data.pointer
guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else { let scaleP = param.scale.data.pointer
fatalError() let biasP = param.bias.data.pointer
} for i in 0..<count {
guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else { let invStd = P(1 / (Float32(varianceP[i]) + param.epsilon).squareRoot())
fatalError() biasP[i] = biasP[i] - meanP[i] * invStd * scaleP[i]
} scaleP[i] = invStd * scaleP[i]
self.newScale = newScale
self.newBias = newBias
super.init(device: device, inFunctionName: "batchnorm")
let varianceBuffer : MTLBuffer = param.inputVariance.buffer
var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
}
let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
let scale : MTLBuffer = param.inputScale.buffer
let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
let bias : MTLBuffer = param.inputBias.buffer
let biasContents = bias.contents().assumingMemoryBound(to: P.self)
let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
}
} }
func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws { param.bias.initBuffer(device: device, precision: computePrecision)
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { param.scale.initBuffer(device: device, precision: computePrecision)
throw PaddleMobileError.predictError(message: " encoder is nil") param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
} if computePrecision == .Float32 {
print("BatchNorm compute") super.init(device: device, inFunctionName: "batchnorm")
encoder.setTexture(param.input.metalTexture, index: 0) } else if computePrecision == .Float16 {
encoder.setTexture(param.output.metalTexture, index: 1) super.init(device: device, inFunctionName: "batchnorm_half")
encoder.setBuffer(newScale, offset: 0, index: 0) } else {
encoder.setBuffer(newBias, offset: 0, index: 1) fatalError()
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) }
encoder.endEncoding() }
func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBuffer(param.scale.buffer, offset: 0, index: 0)
encoder.setBuffer(param.bias.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
//
// BatchNormRelu.swift
// paddle-mobile
//
// Created by zhangxinjun on 2018/8/23.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
class BatchNormReluParam<P: PrecisionType>: BatchNormParam<P> {
}
class BatchNormReluKernel<P: PrecisionType>: Kernel, Computable{
typealias ParamType = BatchNormReluParam<P>
var newScale: MTLBuffer
var newBias: MTLBuffer
required init(device: MTLDevice, testParam: BatchNormReluTestParam) {
newScale = testParam.newScaleBuffer
newBias = testParam.newBiaseBuffer
super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
}
required init(device: MTLDevice, param: BatchNormReluParam<P>) {
guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
fatalError()
}
guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
fatalError()
}
self.newScale = newScale
self.newBias = newBias
super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
let varianceBuffer : MTLBuffer = param.inputVariance.buffer
var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
}
let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
let scale : MTLBuffer = param.inputScale.buffer
let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
let bias : MTLBuffer = param.inputBias.buffer
let biasContents = bias.contents().assumingMemoryBound(to: P.self)
let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
}
}
func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(param.input as? MTLTexture, index: 0)
encoder.setTexture(param.output as? MTLTexture, index: 1)
encoder.setBuffer(newScale, offset: 0, index: 1)
encoder.setBuffer(newBias, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture)
encoder.endEncoding()
}
func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(testParam.inputTexture, index: 0)
encoder.setTexture(testParam.outputTexture, index: 1)
encoder.setBuffer(newScale, offset: 0, index: 0)
encoder.setBuffer(newBias, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct BilinearInterpMetalParam {
var ratio_h: Float32
var ratio_w: Float32
}
class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: BilinearInterpParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
var ratio_h: Float32 = 0
var ratio_w: Float32 = 0
if param.output.tensorDim.dims[2] > 1 {
ratio_h = Float32(param.input.tensorDim.dims[2]-1) / Float32(param.output.tensorDim.dims[2]-1)
}
if param.output.tensorDim.dims[3] > 1 {
ratio_w = Float32(param.input.tensorDim.dims[3]-1) / Float32(param.output.tensorDim.dims[3]-1)
}
var p = BilinearInterpMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w)
encoder.setBytes(&p, length: MemoryLayout<BilinearInterpMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: BilinearInterpParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "bilinear_interp_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "bilinear_interp_half")
} else {
fatalError()
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct BoxcoderMetalParam {
}
class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: BoxcoderParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.priorBox.metalTexture, index: 0)
encoder.setTexture(param.priorBoxVar.metalTexture, index: 1)
encoder.setTexture(param.targetBox.metalTexture, index: 2)
encoder.setTexture(param.output.metalTexture, index: 3)
var bmp = BoxcoderMetalParam.init()
encoder.setBytes(&bmp, length: MemoryLayout<BoxcoderMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: BoxcoderParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "boxcoder_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "boxcoder_half")
} else {
fatalError()
}
}
}
//
// CNNConvKernel.swift
// paddle-mobile
//
import Foundation
import Metal
import Accelerate
import MetalPerformanceShaders
@available(iOS 10.0, *)
class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource {
let desc: MPSCNNConvolutionDescriptor
let weight:UnsafeMutableRawPointer
let bias:UnsafeMutablePointer<Float>
init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer<Float>) {
desc = inDesc
weight = inWeight
bias = inBias
}
func dataType() -> MPSDataType {
return .float32
}
func descriptor() -> MPSCNNConvolutionDescriptor {
return desc
}
func weights() -> UnsafeMutableRawPointer {
return self.weight
}
func biasTerms() -> UnsafeMutablePointer<Float>? {
return self.bias
}
func load() -> Bool {
return true
}
func purge() {
}
func label() -> String? {
return "Conv"
}
}
@available(iOS 10.0, *)
class CNNConvParam<P: PrecisionType>: OpParam{
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope)
output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
// 暂时不用关心
dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
// 暂时不用关心
groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
// bias
y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
var input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
@available(iOS 10.0, *)
class CNNConvKernel<P: PrecisionType>: Kernel, Computable {
typealias ParamType = CNNConvParam<P>
var mpsImageCreator: MpsImageCreator<P>?
var activation:MPSCNNNeuron?
var conv:MPSCNNConvolution?
var weightDataSource:WeightsDataSource?
var param: CNNConvParam<P>?
var device: MTLDevice?
required init(device:MTLDevice, testParam:CNNMPSConvTestParam) {
self.device = device
let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation)
desc.strideInPixelsX = Int(testParam.metalParam.offsetX)
desc.strideInPixelsY = Int(testParam.metalParam.offsetY)
weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer)
if #available(iOS 11.0, *) {
conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
} else {
// Fallback on earlier versions
}
super.init(device: device, inFunctionName: "")
}
required init(device:MTLDevice, param:CNNConvParam<P>) {
self.device = device
let inChannels: Int
let outChannels: Int
if param.y.dim.cout() == 4 {
inChannels = (param.y.dim[3])
outChannels = inChannels
} else {
inChannels = 0
outChannels = inChannels
}
let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation)
desc.strideInPixelsX = Int(param.stride[0])
desc.strideInPixelsY = Int(param.stride[1])
weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer<Float>, inBias: param.y.data.pointer as! UnsafeMutablePointer<Float>)
if #available(iOS 11.0, *) {
conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
} else {
// Fallback on earlier versions
}
super.init(device: device, inFunctionName: "")
}
func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam<P>) throws {
let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
// 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用
conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
param.input = outputImage.texture as! Texture<P>
}
func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws {
let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
// 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用
conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
testParam.outputTexture = outputImage.texture
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConcatKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: ConcatParam<P>) {
super.init(device: device, inFunctionName: "concat")
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct ConcatTestParam: TestParam {
var input: [MTLTexture]
var output: MTLTexture
var dims: [[Int]]
var axis: Int
var odim: [Int]
}
struct ConcatMetalParam {
var odim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1)
var axis: Int32 = 0
var offset: Int32 = 0
var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
var vdim: (Int32, Int32, Int32, Int32, Int32, Int32) = (0, 0, 0, 0, 0, 0)
}
class ConcatKernel<P: PrecisionType>: Kernel, Computable{
var v = "normal"
var pm = ConcatMetalParam.init()
func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
let num = param.input.count
for i in 0..<num {
encoder.setTexture(param.input[i].metalTexture, index: i)
}
encoder.setTexture(param.output.metalTexture, index: num)
if v == "normal" {
encoder.setTexture(param.output.metalTexture, index: num + 1)
}
encoder.setBytes(&pm, length: MemoryLayout<ConcatMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: ConcatParam<P>) {
param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision)
let orank = param.output.tensorDim.cout()
let num = param.input.count
assert(num <= 6)
var axis = 4 - param.output.tensorDim.cout() + param.axis
for i in 0..<4 {
if param.transpose[i] == axis {
axis = i
break
}
}
pm.axis = Int32(axis)
pm.odim = (Int32(param.output.dim[0]), Int32(param.output.dim[1]), Int32(param.output.dim[2]), Int32(param.output.dim[3]))
pm.trans = (Int32(param.output.transpose[0]), Int32(param.output.transpose[1]), Int32(param.output.transpose[2]), Int32(param.output.transpose[3]))
var vdim: [Int] = [0, 0, 0, 0, 0, 0]
for i in 0..<num {
vdim[i] = param.input[i].dim[axis]
}
if orank == 4 {
if axis == 1 {
v = "y"
} else if axis == 2 {
v = "x"
} else {
if (param.output.dim[0] == 1) && axis == 3 {
var vz = true
for i in 0..<num {
if vdim[i] % 4 != 0 {
vz = false
break
}
}
if vz {
v = "z"
for i in 0..<num {
vdim[i] = vdim[i] / 4
}
}
}
}
} else if orank == 3 {
if axis == 2 {
v = "y"
} else if axis == 3 {
v = "x"
} else if axis == 1 {
var vz = true
for i in 0..<num {
if vdim[i] % 4 != 0 {
vz = false
break
}
}
if vz {
v = "z"
for i in 0..<num {
vdim[i] = vdim[i] / 4
}
}
}
} else {
if axis == 2 {
v = "y"
} else if axis == 3 {
var vx = true
for i in 0..<num {
if vdim[i] % 4 != 0 {
vx = false
break
}
}
if vx {
v = "x"
for i in 0..<num {
vdim[i] = vdim[i] / 4
}
}
}
}
pm.vdim = (Int32(vdim[0]), Int32(vdim[1]), Int32(vdim[2]), Int32(vdim[3]), Int32(vdim[4]), Int32(vdim[5]))
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half")
} else {
fatalError()
}
}
required init(device: MTLDevice, testParam: ConcatTestParam) {
super.init(device: device, inFunctionName: "concat")
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
}
} else {
fatalError(" unsupport yet ")
}
} else {
fatalError()
}
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
// print(" function: \(functionName)")
// print("offset x: \(offsetX)")
// print("offset y: \(offsetY)")
let offsetZ = 0.0
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
...@@ -15,124 +15,165 @@ ...@@ -15,124 +15,165 @@
import Foundation import Foundation
struct ConvAddBatchNormReluTestParam: TestParam { struct ConvAddBatchNormReluTestParam: TestParam {
let inputTexture: MTLTexture let inputTexture: MTLTexture
let outputTexture: MTLTexture let outputTexture: MTLTexture
var metalParam: MetalConvParam var metalParam: MetalConvParam
let filterBuffer: MTLBuffer let filterBuffer: MTLBuffer
let biaseBuffer: MTLBuffer let biaseBuffer: MTLBuffer
let newScaleBuffer: MTLBuffer let newScaleBuffer: MTLBuffer
let newBiaseBuffer: MTLBuffer let newBiaseBuffer: MTLBuffer
let filterSize: (width: Int, height: Int, channel: Int) let filterSize: (width: Int, height: Int, channel: Int)
init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
inputTexture = inInputTexture inputTexture = inInputTexture
outputTexture = inOutputTexture outputTexture = inOutputTexture
metalParam = inMetalParam metalParam = inMetalParam
filterBuffer = inFilterBuffer filterBuffer = inFilterBuffer
biaseBuffer = inBiaseBuffer biaseBuffer = inBiaseBuffer
newScaleBuffer = inNewScaleBuffer newScaleBuffer = inNewScaleBuffer
newBiaseBuffer = inNewBiaseBuffer newBiaseBuffer = inNewBiaseBuffer
filterSize = inFilterSize filterSize = inFilterSize
} }
} }
class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable { class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) { required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
} else if testParam.filterSize.channel == 1 { } else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
} else { } else {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
}
} }
}
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32)
var metalParam: MetalConvParam! if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) { super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
} else if param.filter.channel == 1 {
if param.filter.width == 1 && param.filter.height == 1 { super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") } else if param.filter.width == 3 && param.filter.height == 3 {
} else if param.filter.channel == 1 { super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") } else {
} else { fatalError(" unsupport ")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") }
} } else if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half")
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half")
param.variance.initBuffer(device: device) } else if param.filter.width == 3 && param.filter.height == 3 {
param.mean.initBuffer(device: device) super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half")
param.scale.initBuffer(device: device) } else {
param.bias.initBuffer(device: device) fatalError(" unsupport ")
}
let offsetX = param.filter.width/2 - Int(param.paddings[0]) } else {
let offsetY = param.filter.height/2 - Int(param.paddings[1]) fatalError()
print("offset x: \(offsetX)")
print("offset y: \(offsetY)")
let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
invs.append(P(inv))
}
let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
newScale[i] = invs[i] * scaleContents[i]
newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
}
param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)
param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)
newScale.deinitialize(count: param.scale.buffer.length)
newScale.deallocate()
newBiase.deinitialize(count: param.bias.buffer.length)
newBiase.deallocate()
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws { let offsetX = param.filter.width/2 - Int(param.paddings[0])
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { let offsetY = param.filter.height/2 - Int(param.paddings[1])
throw PaddleMobileError.predictError(message: " encode is nil")
} print("offset x: \(offsetX)")
print("offset y: \(offsetY)")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) let offsetZ = 0.0
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2) var invs: [P] = []
encoder.setBuffer(param.newScale!, offset: 0, index: 3) let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
encoder.endEncoding() let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
invs.append(P(inv))
}
let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
newScale[i] = invs[i] * scaleContents[i]
newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
} }
public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) { // var newScaleFP16: UnsafeMutableRawPointer
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { //
fatalError() // float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleFP16, count: param.scale.buffer.length / MemoryLayout<P>.size)
}
encoder.setTexture(param.inputTexture, index: 0) // let newBiaseFloat16 = device.makeBuffer(length: <#T##Int#>, options: <#T##MTLResourceOptions#>)
encoder.setTexture(param.outputTexture, index: 1)
var inMetalParam = param.metalParam var newBiaseBuffer: MTLBuffer
encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0) var newScaleBuffer: MTLBuffer
encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2) if computePrecision == .Float32 {
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3) newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4) newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) } else if computePrecision == .Float16 {
encoder.endEncoding()
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
float32ToFloat16(input: newBiase as! UnsafeMutablePointer<Float32>, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout<P>.size)
float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout<P>.size)
} else {
fatalError(" unsupport ")
} }
param.newBiase = newBiaseBuffer
param.newScale = newScaleBuffer
newScale.deinitialize(count: param.scale.buffer.length)
newScale.deallocate()
newBiase.deinitialize(count: param.bias.buffer.length)
newBiase.deallocate()
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.setBuffer(param.newScale!, offset: 0, index: 3)
encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var inMetalParam = param.metalParam
encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
encoder.endEncoding()
}
} }
...@@ -15,33 +15,73 @@ ...@@ -15,33 +15,73 @@
import Foundation import Foundation
class ConvAddKernel<P: PrecisionType>: Kernel, Computable { class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddParam<P>) { required init(device: MTLDevice, param: ConvAddParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1_half")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half")
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_3x3_half")
} else if param.filter.width == 1 && param.filter.height == 5 {
super.init(device: device, inFunctionName: "conv_add_5x1_half")
} else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5_half")
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1") super.init(device: device, inFunctionName: "conv_add_1x1")
let offsetX = param.filter.width/2 - Int(param.paddings[0]) } else if param.filter.channel == 1 {
let offsetY = param.filter.height/2 - Int(param.paddings[1]) super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
} else if param.filter.width == 1 && param.filter.height == 5 {
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) super.init(device: device, inFunctionName: "conv_add_5x1")
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) } else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5")
print("offset x: \(offsetX)") } else if param.filter.width == 3 && param.filter.height == 3 {
print("offset y: \(offsetY)") super.init(device: device, inFunctionName: "conv_add_3x3")
} else {
let offsetZ = 0.0 fatalError(" unsupport yet ")
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) }
} else {
fatalError()
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
}
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) // print(" function: \(functionName)")
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0) // print("offset x: \(offsetX)")
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) // print("offset y: \(offsetY)")
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) let offsetZ = 0.0
encoder.endEncoding() let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
}
} else {
fatalError(" unsupport yet ")
}
} else {
fatalError()
}
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
// print(" function: \(functionName)")
// print("offset x: \(offsetX)")
// print("offset y: \(offsetY)")
let offsetZ = 0.0
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import MetalPerformanceShaders
struct ConvBNReluTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
var metalParam: MetalConvParam
let filterBuffer: MTLBuffer
let biaseBuffer: MTLBuffer
let newScaleBuffer: MTLBuffer
let newBiaseBuffer: MTLBuffer
let filterSize: (width: Int, height: Int, channel: Int)
init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
inputTexture = inInputTexture
outputTexture = inOutputTexture
metalParam = inMetalParam
filterBuffer = inFilterBuffer
biaseBuffer = inBiaseBuffer
newScaleBuffer = inNewScaleBuffer
newBiaseBuffer = inNewBiaseBuffer
filterSize = inFilterSize
}
}
class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvBNReluTestParam) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
} else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
} else {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
}
}
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvBNReluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32)
if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
} else {
fatalError(" unsupport ")
}
} else if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half")
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half")
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
// print(" param filter width: \(param.filter.width)")
// print(" param filter height: \(param.filter.height)")
//
// print(" param paddings: \(param.paddings)")
//
// print("ConvBNReluKernel offset x: \(offsetX)")
// print("ConvBNReluKernel offset y: \(offsetY)")
let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
invs.append(P(inv))
}
let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
newScale[i] = invs[i] * scaleContents[i]
newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
}
var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer
if computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
float32ToFloat16(input: newBiase as! UnsafeMutablePointer<Float32>, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout<P>.size)
float32ToFloat16(input: newScale as! UnsafeMutablePointer<Float32>, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout<P>.size)
} else {
fatalError(" unsupport ")
}
param.newBiase = newBiaseBuffer
param.newScale = newScaleBuffer
newScale.deinitialize(count: param.scale.buffer.length)
newScale.deallocate()
newBiase.deinitialize(count: param.bias.buffer.length)
newBiase.deallocate()
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvBNReluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.newScale!, offset: 0, index: 2)
encoder.setBuffer(param.newBiase!, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
public func test(commandBuffer: MTLCommandBuffer, param: ConvBNReluTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var inMetalParam = param.metalParam
encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 2)
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = half4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = half4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = half4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = half4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
}
/*---------------------------------------------*/
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
...@@ -14,38 +14,49 @@ ...@@ -14,38 +14,49 @@
import Foundation import Foundation
public struct MetalConvParam { public struct MetalConvParam {
let offsetX: Int16 let offsetX: Int16
let offsetY: Int16 let offsetY: Int16
let offsetZ: Int16 let offsetZ: Int16
let strideX: UInt16 let strideX: UInt16
let strideY: UInt16 let strideY: UInt16
let paddedZ: UInt16 let dilationX: UInt16
let dilationY: UInt16
} }
class ConvKernel<P: PrecisionType>: Kernel, Computable { class ConvKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvParam<P>) { required init(device: MTLDevice, param: ConvParam<P>) {
super.init(device: device, inFunctionName: "conv_add_1x1") param.filter.initBuffer(device: device, precision: ComputePrecision.Float32)
let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) if param.filter.width == 1 && param.filter.height == 1 {
let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) super.init(device: device, inFunctionName: "conv_1x1")
let offsetZ = 0.0 } else if param.filter.channel == 1 {
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) super.init(device: device, inFunctionName: "depthwise_conv_3x3")
} else if param.filter.width == 3 && param.filter.height == 3 {
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) super.init(device: device, inFunctionName: "conv_3x3")
} else {
fatalError(" unsupport ")
} }
let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
let offsetZ = 0.0
func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws { metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { }
throw PaddleMobileError.predictError(message: " encode is nil")
} func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
encoder.setTexture(param.input.metalTexture, index: 0) throw PaddleMobileError.predictError(message: " encode is nil")
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct MetalConvTransposeParam {
let kernelW: UInt16;
let kernelH: UInt16;
let strideX: UInt16;
let strideY: UInt16;
let paddingX: UInt16;
let paddingY: UInt16;
let dilationX: UInt16;
let dilationY: UInt16;
}
class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: MetalConvTransposeParam!
required init(device: MTLDevice, param: ConvTransposeParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision, convertToNHWC: false, withTranspose: true)
if computePrecision == .Float32 {
if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2")
} else {
fatalError(" -- conv transpose unsupported yet -- ")
}
} else if computePrecision == .Float16 {
if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half")
} else {
fatalError(" -- conv transpose unsupported yet -- ")
}
} else {
fatalError()
}
// let filter: [Float32] = param.filter.buffer.array()
// print(" conv transpose filter")
// print(filter)
let kernelWidth = UInt16(param.filter.width)
let kernelHeight = UInt16(param.filter.height)
let strideX = UInt16(param.stride[0])
let strideY = UInt16(param.stride[1])
let paddingX = UInt16(param.paddings[0])
let paddingY = UInt16(param.paddings[1])
let dilationX = UInt16(param.dilations[0])
let dilationY = UInt16(param.dilations[1])
metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvTransposeParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
...@@ -14,13 +14,60 @@ ...@@ -14,13 +14,60 @@
import Foundation import Foundation
struct ElementwiseAddMetalParam {
var fast: Int32 = 0
var axis: Int32 = 0
var ylen: Int32 = 0
var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
var xtrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
var ydim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
var ytrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
}
class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
required init(device: MTLDevice, param: ElementwiseAddParam<P>) { var metalParam: ElementwiseAddMetalParam
super.init(device: device, inFunctionName: "elementwise_add") required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
} param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws { metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "elementwise_add_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_element_float")
} else {
super.init(device: device, inFunctionName: "elementwise_add_prelu_float")
}
} else if computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
}
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct FlattenMetalParam {
var idim: (Int32, Int32, Int32, Int32)
var itrans: (Int32, Int32, Int32, Int32)
var odim: (Int32, Int32, Int32, Int32)
var otrans: (Int32, Int32, Int32, Int32)
}
class FlattenKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: FlattenMetalParam
required init(device: MTLDevice, param: FlattenParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
}
let it: [Int32] = param.input.transpose.map { Int32($0) }
var od: [Int32] = [1, 1, 1, 1]
for i in 0..<param.output.tensorDim.cout() {
od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
let ot: [Int32] = param.output.transpose.map { Int32($0) }
metalParam = FlattenMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout()
assert(orank == 2)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: FlattenParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
public protocol TestParam {
}
public protocol Testable {
associatedtype TestParamType: TestParam
func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
init(device: MTLDevice, testParam: TestParamType)
}
protocol Computable {
associatedtype ParamType: OpParam
func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
init(device: MTLDevice, param: ParamType)
}
protocol KernelProtocol {
var pipline: MTLComputePipelineState { get set }
var functionName: String { get set }
}
open class Kernel {
let pipline: MTLComputePipelineState
let functionName: String
public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
functionName = inFunctionName
}
}
open class CusomKernel: Kernel {
public struct Shape {
public let width: Int
public let height: Int
public let channel: Int
public init(inWidth: Int, inHeight: Int, inChannel: Int){
width = inWidth
height = inHeight
channel = inChannel
}
}
let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
textureDesc.pixelFormat = .rgba32Float
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
}
func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(inputTexuture, index: 0)
encoder.setTexture(outputTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct OutputDim {
ushort width;
ushort height;
ushort strideX;
ushort strideY;
};
kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant OutputDim &params [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
const half4 input = inTexture.read(pos);
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
}
kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *biasTerms [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
outTexture.write(input, gid.xy, gid.z);
}
kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
// uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= inTexture.get_width() ||
// gid.y >= inTexture.get_height()){
// return;
// }
// const half4 input = inTexture.read(gid.xy);
// outTexture.write(input, gid.xy, 0);
//}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const float4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const half4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
float4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 r = inTexture.read(uint2(0, 0), gid.z);
outTexture.write(r, gid.xy, gid.z);
}
kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 r = inTexture.read(uint2(0, 0), gid.z);
outTexture.write(r, gid.xy, gid.z);
}
kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int zsize = inTexture.get_array_size();
float maxv = inTexture.read(uint2(0, 0), 0)[0];
for (int z = 0; z < zsize; z++) {
float4 r = inTexture.read(uint2(0, 0), z);
maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
}
float sum = 0;
for (int z = 0; z < zsize; z++) {
float4 r = inTexture.read(uint2(0, 0), z);
sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
}
float4 rr = inTexture.read(gid.xy, gid.z);
rr = exp(rr - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int zsize = inTexture.get_array_size();
half maxv = inTexture.read(uint2(0, 0), 0)[0];
for (int z = 0; z < zsize; z++) {
half4 r = inTexture.read(uint2(0, 0), z);
maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
}
float sum = 0;
for (int z = 0; z < zsize; z++) {
half4 r = inTexture.read(uint2(0, 0), z);
sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
}
half4 rr = inTexture.read(gid.xy, gid.z);
rr = exp(rr - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
let pipline1: MTLComputePipelineState
required init(device: MTLDevice, param: MulticlassNMSParam<P>) {
param.middleOutput.initBuffer(device: device)
param.bboxOutput.initBuffer(device: device)
if computePrecision == .Float32 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result")
} else if computePrecision == .Float16 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result_half")
} else {
fatalError( " unsupport precision " )
}
}
func compute(commandBuffer: MTLCommandBuffer, param: MulticlassNMSParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.scores.metalTexture, index: 0)
encoder.setBuffer(param.middleOutput.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.scores.metalTexture)
encoder.endEncoding()
guard let encoderBox = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoderBox.setTexture(param.bboxes.metalTexture, index: 0)
encoderBox.setBuffer(param.bboxOutput.resultBuffer!, offset: 0, index: 0)
encoderBox.dispatch(computePipline: pipline1, outTexture: param.bboxes.metalTexture)
encoderBox.endEncoding()
}
}
...@@ -15,46 +15,57 @@ ...@@ -15,46 +15,57 @@
import Foundation import Foundation
struct PoolMetalParam { struct PoolMetalParam {
let ksizeX: Int32 let ksizeX: Int32
let ksizeY: Int32 let ksizeY: Int32
let strideX: Int32 let strideX: Int32
let strideY: Int32 let strideY: Int32
let paddingX: Int32 let paddingX: Int32
let paddingY: Int32 let paddingY: Int32
let poolType: Int32 let poolType: Int32
} }
class PoolKernel<P: PrecisionType>: Kernel, Computable{ class PoolKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws { var metalParam: PoolMetalParam
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { required init(device: MTLDevice, param: PoolParam<P>) {
throw PaddleMobileError.predictError(message: " encoder is nil") param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
}
encoder.setTexture(param.input.metalTexture, index: 0) var poolType: Int32
encoder.setTexture(param.output.metalTexture, index: 1) switch param.poolType {
var poolType: Int32 case "max":
switch param.poolType { poolType = 0
case "max": case "avg":
poolType = 0 poolType = 1
case "avg": default:
poolType = 1 fatalError()
default:
throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
}
var pmp = PoolMetalParam.init(
ksizeX: param.ksize[0],
ksizeY: param.ksize[1],
strideX: param.stride[0],
strideY: param.stride[1],
paddingX: param.padding[0],
paddingY: param.padding[1],
poolType: poolType
)
encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
} }
metalParam = PoolMetalParam.init(
ksizeX: param.ksize[0],
ksizeY: param.ksize[1],
strideX: param.stride[0],
strideY: param.stride[1],
paddingX: param.padding[0],
paddingY: param.padding[1],
poolType: poolType
)
required init(device: MTLDevice, param: PoolParam<P>) { if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool") super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<PoolMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class PreluKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: PreluParam<P>) {
param.alpha.initBuffer(device: device, precision: computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element")
} else {
super.init(device: device, inFunctionName: "prelu_other")
}
} else if computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element_half")
} else {
super.init(device: device, inFunctionName: "prelu_other_half")
}
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct PriorBoxMetalParam {
let offset: Float32
let stepWidth: Float32
let stepHeight: Float32
let minSize: Float32
let maxSize: Float32
let imageWidth: Float32
let imageHeight: Float32
let clip: Bool
let numPriors: uint
let aspecRatiosSize: uint
let minSizeSize: uint
let maxSizeSize: uint
}
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
let originDim = param.output.tensorDim;
param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision)
param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision)
if computePrecision == .Float32 {
if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder")
} else {
super.init(device: device, inFunctionName: "prior_box")
}
} else if computePrecision == .Float16 {
if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half")
} else {
super.init(device: device, inFunctionName: "prior_box_half")
}
} else {
fatalError()
}
guard param.minSizes.count == 1 else {
fatalError(" need implement ")
}
// let n = 1
// let h = param.output.dim[1]
// let w = param.output.dim[2]
// let c = param.output.dim[3] * param.output.dim[0]
//
// param.output.dim = Dim.init(inDim: [n, h, w, c])
// param.output.transpose = [0, 1, 2, 3]
let imageWidth = Float32(param.inputImage.padToFourDim[3])
let imageHeight = Float32(param.inputImage.padToFourDim[2])
let featureWidth = param.input.padToFourDim[3]
let featureHeight = param.input.padToFourDim[2]
if param.stepW == 0 || param.stepH == 0 {
param.stepW = Float32(imageWidth) / Float32(featureWidth)
param.stepH = Float32(imageHeight) / Float32(featureHeight)
}
var outputAspectRatior: [Float32] = []
outputAspectRatior.append(1.0)
let epsilon = 1e-6
for ar in param.aspectRatios {
var alreadyExist = false
for outputAr in outputAspectRatior {
if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
alreadyExist = true
break
}
}
if !alreadyExist {
outputAspectRatior.append(ar)
}
if param.flip {
outputAspectRatior.append(1.0 / ar)
}
}
if computePrecision == .Float16 {
let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
param.newAspectRatios = buffer
} else if computePrecision == .Float32 {
let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout<Float32>.size, options: [])
param.newAspectRatios = buffer
} else {
fatalError()
}
let aspectRatiosSize = uint(outputAspectRatior.count)
let maxSizeSize: uint = uint(param.maxSizes.count)
let minSizeSize: uint = uint(param.minSizes.count)
let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
let minSize = param.minSizes.last ?? 0.0
let maxSize = param.maxSizes.last ?? 0.0
metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
}
func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setTexture(param.outputVariances.metalTexture, index: 2)
encoder.setBuffer(param.newAspectRatios!, offset: 0, index: 0)
encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 1)
encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
...@@ -15,17 +15,23 @@ ...@@ -15,17 +15,23 @@
import Foundation import Foundation
class ReluKernel<P: PrecisionType>: Kernel, Computable{ class ReluKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
} }
encoder.setTexture(param.input.metalTexture, index: 0)
required init(device: MTLDevice, param: ReluParam<P>) { encoder.setTexture(param.output.metalTexture, index: 1)
super.init(device: device, inFunctionName: "relu") encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: ReluParam<P>) {
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "relu")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "relu_half")
} else {
fatalError()
} }
}
} }
...@@ -14,18 +14,84 @@ ...@@ -14,18 +14,84 @@
import Foundation import Foundation
struct ReshapeMetalParam {
var idim: (Int32, Int32, Int32, Int32)
var itrans: (Int32, Int32, Int32, Int32)
var odim: (Int32, Int32, Int32, Int32)
var otrans: (Int32, Int32, Int32, Int32)
}
struct ReshapeTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
let param: ReshapeMetalParam
}
class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: ReshapeParam<P>) {
super.init(device: device, inFunctionName: "reshape") var metalParam: ReshapeMetalParam
required init(device: MTLDevice, param: ReshapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
} }
let it: [Int32] = param.input.transpose.map { Int32($0) }
func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws { var od: [Int32] = [1, 1, 1, 1]
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { for i in 0..<param.output.tensorDim.cout() {
throw PaddleMobileError.predictError(message: " encoder is nil") od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
} }
let ot: [Int32] = param.output.transpose.map { Int32($0) }
metalParam = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout()
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half")
} else {
fatalError()
}
}
required init(device: MTLDevice, testParam: ReshapeTestParam) {
metalParam = ReshapeMetalParam.init(
idim: (0, 0, 0, 0),
itrans: (0, 0, 0, 0),
odim: (0, 0, 0, 0),
otrans: (0, 0, 0, 0)
)
super.init(device: device, inFunctionName: "reshape")
}
func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
// func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) {
// guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
// fatalError()
// }
// encoder.setTexture(testParam.inputTexture, index: 0)
// encoder.setTexture(testParam.outputTexture, index: 1)
// var pm: ReshapeMetalParam = testParam.param
// encoder.setBytes(&pm, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
// encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
// encoder.endEncoding()
// }
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct ResizeBilinearMetalParam {
var ratio_h: Float32
var ratio_w: Float32
}
class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
let ratio_h: Float32 = Float32(param.input.tensorDim.dims[2]) / Float32(param.output.tensorDim.dims[2])
let ratio_w: Float32 = Float32(param.input.tensorDim.dims[3]) / Float32(param.output.tensorDim.dims[3])
var p = ResizeBilinearMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w)
encoder.setBytes(&p, length: MemoryLayout<ConcatMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: ResizeBilinearParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "resize_bilinear")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "resize_bilinear_half")
} else {
fatalError()
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//
//import Foundation
//import MetalPerformanceShaders
//
//
//struct ResizeParam: OpParam{
// typealias OutputType = <#type#>
//
// typealias ParamPrecisionType = <#type#>
//
// let input: MTLTexture
// let output: MTLTexture
// let expectDim: Dim
//}
//
//struct OutputDim {
// let width: UInt16
// let height: UInt16
// let strideX: UInt16
// let strideY: UInt16
//}
//
//class ResizeKernel<P: PrecisionType>: Kernel, Computable{
// var lanczos: MPSImageLanczosScale
// required init(device: MTLDevice, param: ResizeParam) {
// lanczos = MPSImageLanczosScale.init(device: device)
// super.init(device: device, inFunctionName: "resize")
// }
// func compute(commandBuffer: MTLCommandBuffer, param: ResizeParam) throws {
//// guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
//// throw PaddleMobileError.predictError(message: " encode is nil")
//// }
// lanczos.encode(commandBuffer: commandBuffer, sourceTexture: param.input, destinationTexture: param.output)
//
//// encoder.setTexture(param.input, index: 0)
//// encoder.setTexture(param.output, index: 1)
//// let strideX = param.input.width/param.expectDim[2]
//// let strideY = param.input.height/param.expectDim[1]
//// var outputDim = OutputDim.init(width: UInt16(param.expectDim[1]), height: UInt16(param.expectDim[2]), strideX: UInt16(strideX), strideY: UInt16(strideY))
//// encoder.setBytes(&outputDim, length: MemoryLayout<OutputDim>.size, index: 0)
//// encoder.dispatch(computePipline: pipline, outTexture: param.output)
//// encoder.endEncoding()
// }
//
//
//
//
//}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct ShapeMetalParam {
}
class ShapeKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ShapeParam<P>) throws {
// print("shape compute")
// guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
// throw PaddleMobileError.predictError(message: " encode is nil")
// }
// encoder.setTexture(param.output.metalTexture, index: 0)
// encoder.endEncoding()
}
required init(device: MTLDevice, param: ShapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "shape")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "shape_half")
} else {
fatalError()
}
}
}
...@@ -14,19 +14,38 @@ ...@@ -14,19 +14,38 @@
import Foundation import Foundation
struct SoftmaxMetalParam {
let N: Int32
let K: Int32
}
class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws { var metalParam: SoftmaxMetalParam
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { required init(device: MTLDevice, param: SoftmaxParam<P>) {
throw PaddleMobileError.predictError(message: " encoder is nil") param.output.initTexture(device: device, computePrecision: computePrecision)
} metalParam = SoftmaxMetalParam.init(
encoder.setTexture(param.input.metalTexture, index: 0) N: Int32(param.input.tensorDim[0]),
encoder.setTexture(param.output.metalTexture, index: 1) K: Int32(param.input.tensorDim[1])
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) )
encoder.endEncoding() if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "softmax_half")
} else {
fatalError()
} }
}
required init(device: MTLDevice, param: SoftmaxParam<P>) {
super.init(device: device, inFunctionName: "softmax") func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct SplitMetalParam {
var idim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1)
var axis: Int32 = 0
var offset: Int32 = 0
var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
var vdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0)
}
class SplitKernel<P: PrecisionType>: Kernel, Computable{
var smp: SplitMetalParam
func compute(commandBuffer: MTLCommandBuffer, param: SplitParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
for i in 0..<param.outputList.count {
encoder.setTexture(param.outputList[i].metalTexture, index: i + 1)
}
encoder.setBytes(&smp, length: MemoryLayout<SplitMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: SplitParam<P>) {
// param.output.initTexture(device: device, computePrecision: computePrecision)
let num = param.outputList.count
let rank = param.input.tensorDim.cout()
assert(num >= 2 && num <= 4)
for output in param.outputList {
output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
}
smp = SplitMetalParam.init()
smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3]))
smp.axis = Int32(param.axis + param.input.dim.cout() - param.input.tensorDim.cout())
for i in 0..<4 {
if param.input.transpose[i] == smp.axis {
smp.axis = Int32(i)
break
}
}
smp.trans = (Int32(param.input.transpose[0]), Int32(param.input.transpose[1]), Int32(param.input.transpose[2]), Int32(param.input.transpose[3]))
var vdim: [Int32] = [0, 0, 0, 0]
for i in 0..<num {
vdim[i] = Int32(param.outputList[i].tensorDim[param.axis])
}
smp.vdim = (vdim[0], vdim[1], vdim[2], vdim[3])
var v = "normal"
if rank == 4 {
if smp.axis == 1 {
v = "y"
} else if smp.axis == 2 {
v = "x"
}
} else if rank == 3 {
if smp.axis == 2 {
v = "y"
} else if smp.axis == 3 {
v = "x"
}
} else if rank == 2 {
if smp.axis == 2 {
v = "y"
}
}
if v == "normal" {
fatalError("split unsupported")
}
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half")
} else {
fatalError()
}
}
}
...@@ -15,23 +15,31 @@ ...@@ -15,23 +15,31 @@
import Foundation import Foundation
struct Texture2DTo2DArrayParam { struct Texture2DTo2DArrayParam {
let input: MTLTexture let input: MTLTexture
let output: MTLTexture let output: MTLTexture
let expectDim: Dim let expectDim: Dim
} }
class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.mtlTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
encoder.endEncoding()
} }
encoder.setTexture(param.input.mtlTexture, index: 0)
required init(device: MTLDevice, param: FeedParam<P>) { encoder.setTexture(param.output.metalTexture, index: 1)
super.init(device: device, inFunctionName: "texture2d_to_2d_array") encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: FeedParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half")
} else if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
} else {
fatalError()
} }
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct TransposeMetalParam {
var iC: Int32 = 0
var oC: Int32 = 0
var axis: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3)
}
class TransposeKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: TransposeMetalParam = TransposeMetalParam.init()
required init(device: MTLDevice, param: TransposeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
let rank = param.input.tensorDim.cout()
var axis: [Int] = [0, 1, 2, 3]
for i in 0..<param.axis.count {
axis[4-rank+i] = 4 - rank + Int(param.axis[i])
}
var naxis: [Int] = [0, 0, 0, 0]
for i in 0..<4 {
for j in 0..<4 {
if param.input.transpose[j] == axis[i] {
naxis[i] = j
break
}
}
}
metalParam.iC = Int32(param.input.dim[param.input.transpose[3]])
metalParam.oC = Int32(param.output.dim[3])
metalParam.axis = (Int32(naxis[0]), Int32(naxis[1]), Int32(naxis[2]), Int32(naxis[3]))
var kernelFunc = "transpose_undefined"
if computePrecision == .Float16 {
if param.input.transpose == axis {
kernelFunc = "transpose_copy_half"
} else {
kernelFunc = "transpose_\(rank)_half"
}
} else if computePrecision == .Float32 {
if param.input.transpose == axis {
kernelFunc = "transpose_copy_float"
} else {
kernelFunc = "transpose_\(rank)_float"
}
} else {
fatalError()
}
print("===========>", kernelFunc)
print(metalParam)
super.init(device: device, inFunctionName: kernelFunc)
}
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * nscale [[buffer(0)]],
const device float4 * nbias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//
// BatchNormRelu.metal
// paddle-mobile
//
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
float4 input;
float4 output;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct bilinear_interp_param {
float ratio_h;
float ratio_w;
};
#define P float
#include "BilinearInterp.inc.metal"
#undef P
#define P half
#include "BilinearInterp.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2;
P pw = p.z - p.x;
P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r;
r.x = tx - tw / 2;
r.y = ty - th / 2;
r.z = tx + tw / 2;
r.w = ty + th / 2;
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "BoxCoder.inc.metal"
#undef P
#define P half
#include "BoxCoder.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
}
inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4;
}
inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
}
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
}
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
}
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
}
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
};
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VNORMAL
//kernel void FUNC(concat, R, N, normal, P)(array<texture2d_array<P, access::read>, N> in [[texture(0)]],
// texture2d_array<P, access::read> out_x [[texture(N)]],
// texture2d_array<P, access::write> out [[texture(N+1)]],
// constant ConcatParam & pm [[buffer(0)]],
// uint3 gid [[thread_position_in_grid]]) {
//}
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif
texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) {
xyzn[3] = i;
#if R == 4
xyzn2abcd_4(cp.odim[3], xyzn, abcd);
#else
FUNC_R(xyzn2abcd, R)(xyzn, abcd);
#endif
int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue;
int j = 0;
for (; j < N; j++) {
if (k < cp.vdim[j]) {
break;
}
k -= cp.vdim[j];
}
if (j == N) {
continue;
}
int ta = cp.odim[cp.axis];
abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j];
#if R == 4
abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
#else
FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
#endif
cp.odim[cp.axis] = ta;
switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#if N >= 3
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 4
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 5
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 6
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
}
}
out.write(r, gid.xy, gid.z);
}
#endif // V == NORMAL
#if V == VX
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VX
#if V == VY
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int y = gid.y - pm.offset;
if (y < 0) return;
if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[0];
if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
y -= pm.vdim[1];
if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= pm.vdim[2];
if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
y -= pm.vdim[3];
if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
y -= pm.vdim[4];
if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VY
#if V == VZ
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int z = gid.z - pm.offset;
if (z < 0) return;
if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[0];
if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
z -= pm.vdim[1];
if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
z -= pm.vdim[2];
if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
z -= pm.vdim[3];
if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
z -= pm.vdim[4];
if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VZ
#undef VV
#endif // #ifdef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ConcatParam {
int32_t odim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[6];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// >> fast mode
// only support concat_{2,3,4}_{2,3,4,5,6}_y_{float,half}
// only support concat_{3,4}_{2,3,4,5,6}_x_{float,half}
// only support concat_{1,2,3,4}_{2,3,4,5,6}_z_{float,half}
// >> normal mode (loop mode)
// ssd-ar: (R=4, N=3, V=z), (R=3, N=2, V=y), (R=2, N=5, V=x), (R=3, N=5, V=x)
// ssd: (R=2, N=6, V=y), (R=3, N=6, V=y)
// genet: (R=4, N=2, V=normal)
// ssd-ar: (R=3, N=5, V=x)
#define V VX
#define R 3
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=2, N=5, V=x)
#define V VX
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=4, N=3, V=z)
#define V VZ
#define R 4
#define N 3
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=2, N=6, V=y)
#define V VY
#define R 2
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=3, N=6, V=y)
#define V VY
#define R 3
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VNORMAL
#define R 4
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
kernel void conv_add_batch_norm_relu_1x1_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/*---------------------------------------------*/
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - convAdd
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = biase[gid.z];
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z];
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void test_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
if (gid.x > 0 || gid.y > 0 || gid.z > 0) { return; }
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include "Macro.metal"
#pragma mark - convAdd
kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i);
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device P *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - conv bn relu
kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1);
output.z += dot(input, kernel_slice2);
output.w += dot(input, kernel_slice3);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1));
output.z += dot(float4(input), float4(kernel_slice2));
output.w += dot(float4(input), float4(kernel_slice3));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
// texture2d_array<float, access::write> outTexture [[texture(1)]],
// constant MetalConvTransposeParam &param [[buffer(0)]],
// const device float4 *weights [[buffer(1)]],
// uint3 gid [[thread_position_in_grid]]){
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) {
// return;
// }
//
// int input_array_size = inTexture.get_array_size();
//
// uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
//
// uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
//
// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
//
// float4 output;
//
// for (int w = 0; w < param.kernelW; ++w) {
// int top = gid.x - w * param.dilationX + param.paddingX;
// int input_x = top / param.strideX;
// if (top < 0 || input_x >= int(inTexture.get_width())) {
// continue;
// }
//
// for (int h = 0; h < param.kernelH; ++h) {
// int top_y = gid.y - h * param.dilationY + param.paddingY;
// int input_y = top_y / param.strideY;
// if (top_y < 0 || input_y >= int(inTexture.get_height())) {
// continue;
// }
//
// uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
//
// for (int slice = 0; slice < input_array_size; ++slice) {
//
// float4 input;
// float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
// float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
//
// input = inTexture.sample(sample, float2(input_x, input_y), slice);
// output.x += dot(input, kernel_slice);
// output.y += dot(input, kernel_slice1);
// output.z += dot(input, kernel_slice2);
// output.w += dot(input, kernel_slice3);
// }
// }
// }
//
// outTexture.write(output, gid.xy, gid.z);
//}
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
texture2d_array<float, access::read> inputY [[texture(1)]],
texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
// output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
// output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float * output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
// output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
// output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// 占位函数, 啥也没干
kernel void place_holder(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
}
struct OutputDim {
ushort width;
ushort height;
ushort strideX;
ushort strideY;
};
kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant OutputDim &params [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
const half4 input = inTexture.read(pos);
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const float4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const half4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void nms_fetch_result(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_result_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input;
}
kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
float4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float4 alpha_value = alpha[gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 alpha_value = alpha[alpha_to + gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float alpha_value = alpha[0];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_channel_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half4 alpha_value = alpha[gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
half4 alpha_value = alpha[alpha_to + gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half alpha_value = alpha[0];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
half ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(max_box), gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) {
int skip = 0;
for (int i = 0; i < aspect_to + 1; ++i) {
if (fabs(aspect_ratios[i] - 1.) < 1e-6) {
skip += 1;
}
}
aspect_to += skip;
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) {
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(float4(relu), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define FUNC(f, r1, r2, p) CONCAT4_(f, r1, r2, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant ReshapeParam &rp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4];
ReshapeParam lrp = rp;
int oC = lrp.odim[lrp.otrans[3]];
int iC = lrp.idim[lrp.itrans[3]];
int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
VECTOR(P, 4) r;
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if ROUT == 4
xyzn2abcd_4(oC, oxyzn, oabcd);
#else
FUNC_R(xyzn2abcd, ROUT)(oxyzn, oabcd);
#endif
int tabcd[4];
invtrans(lrp.otrans, oabcd, tabcd);
int index = abcd2index(lrp.odim, tabcd);
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
FUNC_R(abcd2xyzn, RIN)(iabcd, ixyzn);
#endif
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
} else {
r[n] = 0;
}
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONRITIONS OF ANY KINR, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ReshapeParam {
int32_t idim[4];
int32_t itrans[4];
int32_t odim[4];
int32_t otrans[4];
};
#define P float
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
#define P half
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct resize_bilinear_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_w;
};
kernel void resize_bilinear(texture2d_array<float, access::read> input [[texture(0)]],
texture2d_array<float, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
float4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
float w = gid.x * pm.ratio_w;
float h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
float w1lambda = w - w0, h1lambda = h - h0;
float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
float4 r0 = input.read(uint2(w0, h0), gid.z);
float4 r1 = input.read(uint2(w1, h0), gid.z);
float4 r2 = input.read(uint2(w0, h1), gid.z);
float4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
kernel void resize_bilinear_half(texture2d_array<half, access::read> input [[texture(0)]],
texture2d_array<half, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
half4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
half w = gid.x * pm.ratio_w;
half h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
half w1lambda = w - w0, h1lambda = h - h0;
half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
half4 r0 = input.read(uint2(w0, h0), gid.z);
half4 r1 = input.read(uint2(w1, h0), gid.z);
half4 r2 = input.read(uint2(w0, h1), gid.z);
half4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
output.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void shape() {
}
kernel void shape_half() {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant SoftmaxParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
// int zsize = inTexture.get_array_size();
P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
int group = sp.K / 4;
int remain = sp.K % 4;
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3]))));
}
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
maxv = max(maxv, r[i]);
}
}
VECTOR(P, 4) rsum = {0, 0, 0, 0};
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
rsum += exp(r - maxv);
}
P sum = rsum[0] + rsum[1] + rsum[2] + rsum[3];
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
sum += exp(r[i] - maxv);
}
}
VECTOR(P, 4) rr = inTexture.read(gid.xy, gid.z);
rr = exp(rr - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct SoftmaxParam {
int N;
int K;
};
#define P float
#include "Softmax.inc.metal"
#undef P
#define P half
#include "Softmax.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VY
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int y = gid.y - sp.offset;
if (y < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
y -= sp.vdim[0];
if (y < sp.vdim[1]) {
out2.write(r, uint2(gid.x, y), gid.z);
return;
}
#if N >= 3
y -= sp.vdim[1];
if (y < sp.vdim[2]) {
out3.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= sp.vdim[2];
if (y < sp.vdim[3]) {
out4.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VY
#if V == VX
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int x = gid.x;
if (x < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
x -= sp.vdim[0];
if (x < sp.vdim[1]) {
out2.write(r, uint2(x, gid.y), gid.z);
return;
}
#if N >= 3
x -= sp.vdim[1];
if (x < sp.vdim[2]) {
out3.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= sp.vdim[2];
if (x < sp.vdim[3]) {
out4.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VX
#undef VV
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct SplitParam {
int32_t idim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[4];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// only support split_{2, 3, 4}_{2, 3, 4}_y_{float, half}
// only support split_{3, 4}_{2, 3, 4}_x_{float, half}
//// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
//// ssd-ar: (R=2, N=2, V=y)
#define V VY
#define R 2
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define FUNC(f, r, p) CONCAT3_(f, r, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(transpose, R, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0};
int iabcd[4], oabcd[4], ixyzn[4];
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if R == 4
xyzn2abcd_4(pm.oC, oxyzn, iabcd);
#endif // R == 4
#if R == 3
xyzn2abcd_3(oxyzn, oabcd);
#endif // R == 3
#if R == 2
xyzn2abcd_2(oxyzn, oabcd);
#endif // R == 2
iabcd[pm.axis[0]] = oabcd[0];
iabcd[pm.axis[1]] = oabcd[1];
iabcd[pm.axis[2]] = oabcd[2];
iabcd[pm.axis[3]] = oabcd[3];
#if R == 4
abcd2xyzn_4(pm.iC, iabcd, ixyzn);
#endif // R == 4
#if R == 3
abcd2xyzn_3(iabcd, ixyzn);
#endif // R == 3
#if R == 2
abcd2xyzn_2(iabcd, ixyzn);
#endif // R == 2
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct TransposeParam {
int iC;
int oC;
int axis[4];
};
kernel void transpose_copy_float(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
#define R 4
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 3
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 2
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class MulticlassNMSParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims)
bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims)
} catch let error {
throw error
}
}
var bboxOutput: FetchHolder
var middleOutput: FetchHolder
let scores: Texture<P>
let bboxes: Texture<P>
var output: Texture<P>
}
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inputVariant() -> [String : [Variant]] {
return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]]
}
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let _ {
fatalError()
}
}
func inferShape() {
// para.output.dim = para.input.dim
}
typealias OpType = MulticlassNMSOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
}
func delogOutput() {
print(" nms - output: ")
print(para.bboxes.metalTexture.float32Array().strideArray())
}
}
...@@ -15,54 +15,60 @@ ...@@ -15,54 +15,60 @@
import Foundation import Foundation
class PoolParam<P: PrecisionType>: OpParam { class PoolParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope) input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs) poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs) ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs) stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs) padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs) ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs) globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
} catch let error { assert(input.transpose == [0, 2, 3, 1])
throw error } catch let error {
} throw error
// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
} }
let input: Texture<P> // let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
var output: Texture<P> }
var ksize: [Int32] let input: Texture<P>
var stride: [Int32] var output: Texture<P>
var padding: [Int32] var ksize: [Int32]
var poolType: String var stride: [Int32]
var ceilMode: Bool var padding: [Int32]
var globalPooling: Bool var poolType: String
var ceilMode: Bool
var globalPooling: Bool
} }
class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{ class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { typealias OpType = PoolOp<P>
// para.output.dim = para.input.dim
} func inferShape() {
// para.output.dim = para.input.dim
typealias OpType = PoolOp<P> }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
try kernel.compute(commandBuffer: buffer, param: para) do {
} catch let error { try kernel.compute(commandBuffer: buffer, param: para)
throw error } catch let error {
} throw error
} }
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
func delogOutput() { // print("pool2d delog")
print("pool2d delog") // let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true) // print(para.ksize)
print(para.ksize) // print(para.stride)
print(para.stride) // print(para.padding)
print(para.padding) // print(para.poolType)
print(para.poolType) // let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true) }
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class PreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
alpha = try PreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let mode: String
let alpha: Tensor<P>
let input: Texture<P>
var output: Texture<P>
}
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = PreluOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) input: ")
print(para.input.metalTexture.toTensor(dim: (n: para.input.padToFourDim[0], c: para.input.padToFourDim[1], h: para.input.padToFourDim[2], w: para.input.padToFourDim[3])).strideArray())
print(" \(type) Alpha: ")
let _: Float32? = para.alpha.buffer.logDesc(header: " alpha: ", stridable: false)
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
}
// print("softmax delog")
// let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
// let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class PriorBoxParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs)
} catch _ {
}
do {
input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope)
output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope)
inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope)
outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope)
minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs)
maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs)
aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs)
variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs)
flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs)
clip = try PriorBoxParam.getAttr(key: "clip", attrs: opDesc.attrs)
stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs)
stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs)
offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
var min_max_aspect_ratios_order: Bool = false
let minSizes: [Float32]
let maxSizes: [Float32]
let aspectRatios: [Float32]
var newAspectRatios: MTLBuffer?
let variances: [Float32]
let flip: Bool
let clip: Bool
var stepW: Float32
var stepH: Float32
let offset: Float32
let input: Texture<P>
let inputImage: Texture<P>
var output: Texture<P>
let outputVariances: Texture<P>
}
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = PriorBoxOp<P>
func inferShape() {
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
// output
// let outputArray = para.output.metalTexture.float32Array()
// print(outputArray.strideArray())
// let device = para.input.metalTexture!.device
// let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3])
// let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3])
// print("boxes: ")
// print(boxes.strideArray())
// print("variances: ")
// print(variances.strideArray())
// output
print(" \(type) output: ")
let box = para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3]))
print(" dim: \(para.output.dim)")
print(box.strideArray())
// print((0..<box.count).map { (index: $0, value: box[$0])})
// print(para.output.realNHWC().strideArray())
// let padToFourDim = para.output.padToFourDim
// if para.output.transpose == [0, 1, 2, 3] {
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
// print(outputArray.strideArray())
// } else if para.output.transpose == [0, 2, 3, 1] {
// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3]), texturePrecision: computePrecision).strideArray())
// } else {
// print(" not implement")
// }
// writeToLibrary(fileName: "box_out", array: outputArray)
// output variance
// let outputVarianceArray = para.outputVariances.metalTexture.floatArray { (o: Float32) -> Float32 in
// return o
// }
//
// print(" output variance: \(outputVarianceArray)")
// writeToLibrary(fileName: "variance_out", array: outputVarianceArray)
}
}
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
// You may obtain a copy of the License at You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and See the License for the specific language governing permissions and
// limitations under the License. */ limitations under the License. */
import Foundation import Foundation
class ReluParam<P: PrecisionType>: OpParam { class ReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope) input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error { } catch let error {
throw error throw error
}
} }
let input: Texture<P> }
var output: Texture<P> let input: Texture<P>
var output: Texture<P>
} }
class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { typealias OpType = ReluOp<P>
para.output.dim = para.input.dim
} func inferShape() {
para.output.dim = para.input.dim
typealias OpType = ReluOp<P> }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
try kernel.compute(commandBuffer: buffer, param: para) do {
} catch let error { try kernel.compute(commandBuffer: buffer, param: para)
throw error } catch let error {
} throw error
} }
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
}
} }
......
...@@ -15,36 +15,63 @@ ...@@ -15,36 +15,63 @@
import Foundation import Foundation
class ReshapeParam<P: PrecisionType>: OpParam { class ReshapeParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error { shape = try ReshapeParam.getAttr(key: "shape", attrs: opDesc.attrs)
throw error
var s: [Int] = shape.map { Int($0) }
var di = -1
var ml = 1
for i in 0..<s.count {
if s[i] == -1 {
di = i
continue
} }
ml *= s[i]
}
if di >= 0 {
s[di] = input.dim.numel() / ml
}
output.tensorDim = Dim.init(inDim: s)
var dim: [Int] = [1, 1, 1, 1]
for i in 0..<s.count {
dim[4-s.count+i] = s[i]
}
output.padToFourDim = Dim.init(inDim: dim)
output.dim = output.padToFourDim
} catch let error {
throw error
} }
let input: Texture<P> }
var output: Texture<P> let input: Texture<P>
let shape: [Int32]
var output: Texture<P>
} }
class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { typealias OpType = ReshapeOp<P>
// para.output.dim = para.input.dim
} func inferShape() {
// para.output.dim = para.input.dim
typealias OpType = ReshapeOp<P> }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
try kernel.compute(commandBuffer: buffer, param: para) do {
} catch let error { try kernel.compute(commandBuffer: buffer, param: para)
throw error } catch let error {
} throw error
}
func delogOutput() {
print("reshape delog")
let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
} }
}
func delogOutput() {
print("reshape delog")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
// print(outputArray)
}
} }
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class ResizeBilinearParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope)
// if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
// fatalError()
// }
output = try ResizeBilinearParam.outputOut(outputs: opDesc.outputs, from: inScope)
out_h = try ResizeBilinearParam.getAttr(key: "out_h", attrs: opDesc.attrs)
out_w = try ResizeBilinearParam.getAttr(key: "out_w", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let out_h: Int32
let out_w: Int32
}
class ResizeBilinearOp<P: PrecisionType>: Operator<ResizeBilinearKernel<P>, ResizeBilinearParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = ResizeBilinearOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ShapeParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope)
output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
}
}
var output: Texture<P>
let input: Texture<P>
}
class ShapeOp<P: PrecisionType>: Operator<ShapeKernel<P>, ShapeParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = ShapeOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
}
}
...@@ -15,36 +15,48 @@ ...@@ -15,36 +15,48 @@
import Foundation import Foundation
class SoftmaxParam<P: PrecisionType>: OpParam { class SoftmaxParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error assert(input.tensorDim.dims.count == 2)
} assert(input.transpose == [0, 1, 2, 3])
output.dim = input.dim
output.tensorDim = input.tensorDim
output.padToFourDim = input.padToFourDim
} catch let error {
throw error
} }
let input: Texture<P> }
var output: Texture<P> let input: Texture<P>
var output: Texture<P>
} }
class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{ class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = SoftmaxOp<P>
func inferShape() {
// para.output.dim = para.input.dim func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
}
func delogOutput() {
print("softmax delog")
print(para.input)
typealias OpType = SoftmaxOp<P> print(para.output)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { let padToFourDim = para.output.padToFourDim
do { let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
try kernel.compute(commandBuffer: buffer, param: para) print(outputArray.strideArray())
} catch let error { }
throw error
}
}
func delogOutput() {
print("softmax delog")
let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class SplitParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
output = Texture<P>.init(device: input.metalTexture!.device, inDim: input.dim)
axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs)
sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs)
if axis < 0 {
axis = input.tensorDim.cout() + axis
}
guard let outlist = opDesc.outputs["Out"] else {
fatalError()
}
for out in outlist {
guard let variant = inScope[out], let v = variant as? Texture<P> else {
fatalError()
}
outputList.append(v)
sections.append(Int32(v.tensorDim.dims[axis]))
}
} catch let error {
throw error
}
}
var axis: Int
let input: Texture<P>
var output: Texture<P>
var outputList: [Texture<P>] = []
var sections: [Int32] = []
}
class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = SplitOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.input.metalTexture!.device
for out in para.outputList {
let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose)
print(arr.strideArray())
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class TransposeParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try TransposeParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let axis: [Int32]
}
class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam<P>>, Runable, Creator, InferShaperable{
typealias OpType = TransposeOp<P>
func inferShape() {
//para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import MetalKit
import Foundation
@objc public enum Platform: Int{
case CPU, GPU
}
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) {
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
} else {
fatalError(" unsupport ")
}
}
}
public class Runner: NSObject {
var program: Program?
var executor: Executor<Float32>?
var queue: MTLCommandQueue?
var textureLoader: MTKTextureLoader?
public let net: Net
let device: MTLDevice?
let platform: Platform
var cpuPaddleMobile: PaddleMobileCPU?
let numel: Int
let meansNumber: [NSNumber]
// dims num nchw
let dimsNum: [NSNumber]
/**
* inNet: 需要运行的网络
* commandQueue: GPU 是需要传入
* inPlatform: 需要使用的平台, GPU or CPU
*/
@objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) {
net = inNet
queue = commandQueue
device = queue?.device
platform = inPlatform
if let inDevice = device {
textureLoader = MTKTextureLoader.init(device: inDevice)
}
if platform == .CPU {
cpuPaddleMobile = PaddleMobileCPU.init()
}
numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w
meansNumber = net.means.map { NSNumber.init(value: $0) }
dimsNum = [NSNumber.init(value: net.dim.n),
NSNumber.init(value: net.dim.c),
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
}
/**
* load 模型, 返回 true 可进行预测
*/
@objc public func load() -> Bool {
if platform == .GPU {
guard let inDevice = device, let inQueue = queue else {
print(" paddle mobile gpu load error, need MTLCommandQueue")
return false
}
let loader = Loader<Float32>.init()
do {
program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize)
// program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath)
net.updateProgram(program: program!)
executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!)
} catch let error {
print(error)
return false
}
} else {
return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false
}
return true
}
@objc public func predict(inputPointer: UnsafeMutablePointer<Float32>, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) {
guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else {
completion(false, nil)
return
}
completion(true, res)
}
/**
* GPU 版本 predict
* texture: 需要预测的 texture 需要做过预处理
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
@objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
do {
try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in
guard let SSelf = self else {
fatalError( " self nil " )
}
let result = SSelf.net.fetchResult(paddleMobileRes: res)
completion(true, result)
}, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
} catch let error {
print(error)
completion(false, nil)
return
}
}
/**
* CPU GPU 通用版本 predict
* cgImage: 需要预测的图片
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
// if platform == .GPU {
// getTexture(image: cgImage) { [weak self] (texture) in
// guard let SSelf = self else {
// fatalError( "" )
// }
// SSelf.predict(texture: texture, completion: completion)
// }
// } else if platform == .CPU {
// let input = preproccess(image: cgImage)
// predict(inputPointer: input, completion: completion)
// input.deinitialize(count: numel)
// input.deallocate()
// }
// }
/*
* 清理内存, 调用此函数后, 不能再使用, 需重新 load
*/
@objc public func clear() {
if platform == .GPU {
executor?.clear()
executor = nil
program = nil
} else if platform == .CPU {
cpuPaddleMobile?.clear()
}
}
@objc public func preproccess(image: CGImage) -> UnsafeMutablePointer<Float> {
let output = UnsafeMutablePointer<Float>.allocate(capacity: numel)
let means = net.means.map { NSNumber.init(value: $0) }
let dims = [NSNumber.init(value: net.dim.n),
NSNumber.init(value: net.dim.c),
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims)
return output
}
/*
* 获取 texture, 对 texture 进行预处理, GPU 预测时使用
*/
@objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
scaleTexture(input: texture!, complete: getTexture)
}
public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
fatalError( " queue or devcie nil " )
}
guard let buffer = inQueue.makeCommandBuffer() else {
fatalError( " make buffer error" )
}
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3))
do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
} catch let error {
print(error)
fatalError()
}
buffer.addCompletedHandler { (buffer) in
complete(scaleKernel.outputTexture)
}
buffer.commit()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <Metal/Metal.h>
#import <Foundation/Foundation.h>
typedef enum : NSUInteger {
MobileNetType,
MobileNetSSDType,
GenetType,
} NetType;
@interface PaddleMobileGPUResult: NSObject
@property (assign, nonatomic) float *output;
@property (assign, nonatomic) int outputSize;
-(void)releaseOutput;
@end
@interface ModelConfig: NSObject
/*
* 预处理需要用到的值 (三个)
*/
@property (strong, nonatomic) NSArray<NSNumber *> *means;
/*
* 预处理需要用到的 scale 值
*/
@property (assign, nonatomic) float scale;
/*
* 输出维度信息 [n c h w]
*/
@property (strong, nonatomic) NSArray<NSNumber *> *dims;
/*
* 模型参数内存地址
*/
@property (assign, nonatomic) void *paramPointer;
/*
* 模型参数占用内存大小 (kb)
*/
@property (assign, nonatomic) int paramSize;
/*
* 模型内存地址
*/
@property (assign, nonatomic) void *modelPointer;
/*
* 模型占用内存大小 (kb)
*/
@property (assign, nonatomic) int modelSize;
@end
@interface PaddleMobileGPU: NSObject
/*
* 初始化
*/
-(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config;
/*
* paramPointer 模型参数内存地址
* paramSize 模型参数占用内存大小 (kb)
* modelPointer 模型内存地址
* modelSize 模型占用内存大小 (kb)
*/
-(BOOL)load;
/*
* texture: 需要进行预测的图像转换的 texture
* completion: 预测完成回调
*/
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion;
/*
* texture: 需要进行预测的图像转换的 texture
* completion: 预测完成回调
*/
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion;
/*
* 清理内存
*/
-(void)clear;
@end
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <Foundation/Foundation.h>
#import "PaddleMobileGPU.h"
#import "paddle_mobile.h"
#import <paddle_mobile/paddle_mobile-Swift.h>
@implementation ModelConfig
@end
@interface PaddleMobileGPUResult ()
@property (strong, nonatomic) ResultHolder *resultHolder;
- (void)setOutputResult:(ResultHolder *)resultHolder;
@end
@implementation PaddleMobileGPUResult
- (void)setOutputResult:(ResultHolder *)resultHolder {
self.resultHolder = resultHolder;
self.output = resultHolder.result;
self.outputSize = resultHolder.capacity;
}
-(void)releaseOutput {
[self.resultHolder releasePointer];
}
@end
@interface PaddleMobileGPU ()
{
Runner *runner;
}
@end
@implementation PaddleMobileGPU
-(instancetype)initWithCommandQueue:(id<MTLCommandQueue>)queue net:(NetType)netType modelConfig:(ModelConfig *)config {
self = [super init];
if (self) {
Net *net = nil;
if (netType == GenetType) {
net = [[Genet alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
} else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
} else if (netType == MobileNetType) {
}
runner = [[Runner alloc] initInNet:net commandQueue:queue inPlatform:PlatformGPU];
}
return self;
}
-(BOOL)load {
return [runner load];
}
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity];
for (int i = 0; i < result.capacity; ++i) {
[resultArray addObject:[NSNumber numberWithFloat:result.result[i]]];
}
completion(success, resultArray);
[result releasePointer];
}];
}
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
[gpuResult setOutputResult:result];
completion(success, gpuResult);
}];
}
-(void)clear {
[runner clear];
}
@end
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
struct BlockDesc { class BlockDesc {
let index: Int let index: Int
let parentIndex: Int let parentIndex: Int
let vars: [VarDesc] let vars: [VarDesc]
...@@ -48,8 +48,10 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { ...@@ -48,8 +48,10 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { var description: String {
var str = "" var str = ""
for op in ops { for i in 0..<ops.count {
str += op.description str += " op \(i): "
let op = ops[i]
str += op.description
} }
for varDesc in vars { for varDesc in vars {
......
...@@ -14,68 +14,68 @@ ...@@ -14,68 +14,68 @@
import Foundation import Foundation
struct OpDesc { class OpDesc {
let inputs: [String : [String]] let inputs: [String : [String]]
var paraInputs: [String : [String]] var paraInputs: [String : [String]]
var outputs: [String : [String]] var outputs: [String : [String]]
let unusedOutputs: [String : [String]] let unusedOutputs: [String : [String]]
var attrs: [String : Attr] = [:] var attrs: [String : Attr] = [:]
var type: String var type: String
init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) { init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
type = protoOpDesc.type type = protoOpDesc.type
let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
var map: [String : [String]] = [:] var map: [String : [String]] = [:]
for opDescVar in vars { for opDescVar in vars {
if (canAdd(opDescVar.parameter)) { if (canAdd(opDescVar.parameter)) {
map[opDescVar.parameter] = opDescVar.arguments map[opDescVar.parameter] = opDescVar.arguments
}
}
return map
}
inputs = creator(protoOpDesc.inputs) {
opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
}
paraInputs = creator(protoOpDesc.inputs) {
!(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
}
outputs = creator(protoOpDesc.outputs) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputs) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrs {
if (attr.type != .block) {
attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
}
} }
}
return map
} }
}
inputs = creator(protoOpDesc.inputs) {
extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible { opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
var description: String {
var str = ""
str += "op type: \(type): \n"
str += " op inputs: \n"
str += " \(inputs) \n"
str += " op para inputs: \n"
str += " \(paraInputs) \n"
str += " op para outputs: \n"
str += " \(outputs) \n"
str += " op attrs: \n"
str += " \(attrs) \n"
return str
} }
var debugDescription: String { paraInputs = creator(protoOpDesc.inputs) {
return description !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
} }
outputs = creator(protoOpDesc.outputs) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputs) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrs {
if (attr.type != .block) {
attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
}
}
}
}
extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
var str = ""
str += "op type: \(type): \n"
str += " op inputs: \n"
str += " \(inputs) \n"
str += " op para inputs: \n"
str += " \(paraInputs) \n"
str += " op para outputs: \n"
str += " \(outputs) \n"
str += " op attrs: \n"
str += " \(attrs) \n"
return str
}
var debugDescription: String {
return description
}
} }
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
public struct Program { public class Program {
let paramPath: String let paramPath: String
let programDesc: ProgramDesc let programDesc: ProgramDesc
let scope: Scope let scope: Scope
...@@ -23,4 +23,9 @@ public struct Program { ...@@ -23,4 +23,9 @@ public struct Program {
paramPath = inParamPath paramPath = inParamPath
scope = inScope scope = inScope
} }
init(inProgramDesc: ProgramDesc, inScope: Scope) {
programDesc = inProgramDesc
scope = inScope
paramPath = ""
}
} }
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
public struct ProgramDesc { public class ProgramDesc {
var blocks: [BlockDesc] = [] var blocks: [BlockDesc] = []
init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) { init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
for block in protoProgram.blocks { for block in protoProgram.blocks {
......
...@@ -15,204 +15,285 @@ ...@@ -15,204 +15,285 @@
import Foundation import Foundation
precedencegroup ChainNode { precedencegroup ChainNode {
associativity: left associativity: left
higherThan: MultiplicationPrecedence higherThan: MultiplicationPrecedence
} }
infix operator --> : ChainNode infix operator --> : ChainNode
class Node { class Node {
var inputs: [Node] = [] var inputs: [Node] = []
var outputs: [Node] = [] var outputs: [Node] = []
var type: String var type: String
var opDesc: OpDesc? var opDesc: OpDesc?
init(inOpDesc: OpDesc) { init(inOpDesc: OpDesc) {
type = inOpDesc.type type = inOpDesc.type
opDesc = inOpDesc opDesc = inOpDesc
}
init(inType: String) {
type = inType
}
subscript(index: Int) -> [Node] {
var nodes: [Node] = []
getNodesWithLocation(index: index, nowIndex: 0, nodes: &nodes)
return nodes
}
func getNodesWithLocation(index: Int, nowIndex: Int, nodes: inout [Node]) {
if index == nowIndex {
nodes.append(self)
} }
init(inType: String) { for output in outputs {
type = inType output.getNodesWithLocation(index: index, nowIndex: nowIndex + 1, nodes: &nodes)
}
}
static func -->(lNode: Node, rNode: Node) -> Node {
lNode.outputs.append(rNode)
rNode.inputs.append(lNode)
return rNode
}
func depth(begin: UInt = 1) -> UInt {
var beginMax: UInt = 1
for output in outputs {
let subDepth = output.depth(begin: begin + 1)
beginMax = max(begin, subDepth)
}
beginMax = max(begin, beginMax)
return beginMax
}
func to(depth: UInt) -> Node {
let beginNode = Node.init(inType: type)
beginNode.opDesc = opDesc
to(depth: depth - 1, withNode: beginNode)
return beginNode
}
func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
let fusionNode = fusion.fusionNode()
let change = fusion.change()
let inOutputs = outputs
outputs.removeAll()
opDesc?.outputs.removeAll()
for i in 0..<inOutputs.count {
inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
}
opDesc?.type = fusion.fusionType()
type = fusion.fusionType()
}
private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
guard let inOpdesc = opDesc else {
fatalError()
} }
static func -->(lNode: Node, rNode: Node) -> Node { for attr in inOpdesc.attrs {
lNode.outputs.append(rNode) beginNode.opDesc?.attrs[attr.key] = attr.value
rNode.inputs.append(lNode) // print(beginNode.opDesc?.attrs)
return rNode
} }
func depth(begin: UInt = 1) -> UInt { for paraInput in inOpdesc.paraInputs {
var beginMax: UInt = 1 if let inChanges = change[type] {
for output in outputs { for keyChange in inChanges {
let subDepth = output.depth(begin: begin + 1) if keyChange.from == paraInput.key {
beginMax = max(begin, subDepth) beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
} }
beginMax = max(begin, beginMax) } else {
return beginMax beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
} }
func to(depth: UInt) -> Node { if matchNode.outputs.count == 0 {
let beginNode = Node.init(inType: type) beginNode.outputs.append(contentsOf: outputs)
to(depth: depth - 1, withNode: beginNode) beginNode.opDesc?.outputs = inOpdesc.outputs
return beginNode
} }
removedNodes.append(self)
func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) { for i in 0..<matchNode.outputs.count {
let fusionNode = fusion.fusionNode() outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
let change = fusion.change()
let inOutputs = outputs
outputs.removeAll()
opDesc?.outputs.removeAll()
for i in 0..<inOutputs.count {
inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
}
opDesc?.type = fusion.fusionType()
type = fusion.fusionType()
} }
private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) { }
guard let inOpdesc = opDesc else {
fatalError() private func to(depth: UInt, withNode: Node) {
} if depth < 1 {
return
for attr in inOpdesc.attrs {
beginNode.opDesc?.attrs[attr.key] = attr.value
// print(beginNode.opDesc?.attrs)
}
for paraInput in inOpdesc.paraInputs {
if let inChanges = change[type] {
for keyChange in inChanges {
if keyChange.from == paraInput.key {
beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
if matchNode.outputs.count == 0 {
beginNode.outputs.append(contentsOf: outputs)
beginNode.opDesc?.outputs = inOpdesc.outputs
}
removedNodes.append(self)
for i in 0..<matchNode.outputs.count {
outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
}
} }
private func to(depth: UInt, withNode: Node) { for output in outputs {
if depth < 1 { let node = Node.init(inType: output.type)
return node.opDesc = output.opDesc
} withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node)
for output in outputs { }
let node = Node.init(inType: output.type) }
withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node) func relationship() -> [String : Node]{
} var map: [String : Node] = [:]
relationship(map: &map)
return map
}
private func relationship(map: inout [String : Node]) {
guard let inOpDesc = opDesc else {
return
} }
for output in inOpDesc.outputs {
for outputKey in output.value {
map[outputKey] = self
}
}
for output in outputs {
output.relationship(map: &map)
}
}
} }
extension Node: Equatable { extension Node: Equatable {
static func == (lhs: Node, rhs: Node) -> Bool { static func == (lhs: Node, rhs: Node) -> Bool {
if lhs.outputs.count != rhs.outputs.count { if lhs.outputs.count != rhs.outputs.count {
return false return false
}
if lhs.type != rhs.type {
return false
}
for i in 0..<lhs.outputs.count {
if lhs.outputs[i] != rhs.outputs[i] {
return false
}
}
return true
} }
if lhs.type != rhs.type {
return false
}
for i in 0..<lhs.outputs.count {
if lhs.outputs[i] != rhs.outputs[i] {
return false
}
}
return true
}
} }
class ProgramOptimize<P: PrecisionType> { class ProgramOptimize<P: PrecisionType> {
let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self, ConvAddOp<P>.self] // register fusion
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
// ConvAddAddPreluOp<P>.self,
guard originProgramDesc.blocks.count == 1 else { ConvAddPreluOp<P>.self,
fatalError(" not support yet") ConvAddOp<P>.self,
ConvBNReluOp<P>.self,
DwConvBNReluOp<P>.self,
ElementwiseAddPreluOp<P>.self
]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
guard originProgramDesc.blocks.count == 1 else {
fatalError(" not support yet")
}
var mapForNodeChain: [String : Node] = [:]
var nodes: [Node] = []
var typeMapNodes: [String : [(node: Node, output: [String : Node])]] = [:]
let block = originProgramDesc.blocks[0]
for opDesc in block.ops {
guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
fatalError()
}
let node = Node.init(inOpDesc: opDesc)
for inputKey in opInputKeys {
if let inputs = opDesc.inputs[inputKey] {
for input in inputs {
if let inputNode = mapForNodeChain[input] {
_ = inputNode --> node
}
}
} }
}
var mapForNodeChain: [String : Node] = [:]
var nodes: [Node] = [] for outputKey in outputKeys {
var typeMapNodes: [String : [Node]] = [:] if let outputs = opDesc.outputs[outputKey] {
let block = originProgramDesc.blocks[0] for output in outputs {
for opDesc in block.ops { mapForNodeChain[output] = node
guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else { }
fatalError() }
} }
let node = Node.init(inOpDesc: opDesc) nodes.append(node)
for inputKey in opInputKeys {
if let inputs = opDesc.inputs[inputKey] { if var inNodes = typeMapNodes[opDesc.type] {
for input in inputs { inNodes.append((node, mapForNodeChain))
if let inputNode = mapForNodeChain[input] { typeMapNodes[opDesc.type] = inNodes
_ = inputNode --> node } else {
} typeMapNodes[opDesc.type] = [(node, mapForNodeChain)]
} }
}
for fusion in fusionOps {
let fusionNode = fusion.fusionNode()
let depth = fusionNode.depth()
if let toMatchNodes = typeMapNodes[fusionNode.type] {
for node in toMatchNodes {
let toNode = node.node.to(depth: depth)
if toNode == fusionNode { // match
var canFolder = true
let relationshipMap = toNode.relationship()
for toCheck in fusion.needCheck() {
// let nodes = toCheck
let checkNodes = toNode[toCheck.0]
for checkNode in checkNodes {
let inputToChecks = checkNode.opDesc?.inputs[toCheck.1] ?? []
for inputToCheck in inputToChecks {
if node.output[inputToCheck] == nil {
if relationshipMap[inputToCheck] == nil {
canFolder = false
} }
}
} }
for outputKey in outputKeys { let paramInputToChecks = checkNode.opDesc?.paraInputs[toCheck.1] ?? []
if let outputs = opDesc.outputs[outputKey] { for paramInputToCheck in paramInputToChecks {
for output in outputs { if node.output[paramInputToCheck] == nil {
mapForNodeChain[output] = node if relationshipMap[paramInputToCheck] == nil {
} canFolder = false
} }
}
} }
}
nodes.append(node)
if var inNodes = typeMapNodes[opDesc.type] {
inNodes.append(node)
typeMapNodes[opDesc.type] = inNodes
} else {
typeMapNodes[opDesc.type] = [node]
}
} }
for fusion in fusionOps { if !canFolder {
let fusionNode = fusion.fusionNode() continue
let depth = fusionNode.depth()
if let toMatchNodes = typeMapNodes[fusionNode.type] {
for node in toMatchNodes {
let toNode = node.to(depth: depth)
if toNode == fusionNode { // match
var removeNodes: [Node] = []
node.folderWith(fusion: fusion, removedNodes: &removeNodes)
for removeNode in removeNodes {
nodes.remove(element: removeNode)
}
}
}
}
} }
var ops: [OpDesc] = [] var removeNodes: [Node] = []
for node in nodes { node.node.folderWith(fusion: fusion, removedNodes: &removeNodes)
ops.append(node.opDesc!) for removeNode in removeNodes {
nodes.remove(element: removeNode)
}
}
} }
}
var newProgramDesc = ProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock)
return newProgramDesc
} }
var ops: [OpDesc] = []
for node in nodes {
ops.append(node.opDesc!)
}
var newProgramDesc = ProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock)
return newProgramDesc
}
} }
...@@ -14,18 +14,18 @@ ...@@ -14,18 +14,18 @@
import Foundation import Foundation
struct TensorDesc { class TensorDesc {
let dims: [Int] let dims: [Int]
let dataType: VarTypeType let dataType: VarTypeType
let dataLayout: DataLayout = .NCHW let dataLayout: DataLayout = DataLayout.NCHW()
var NCHWDim: [Int] { var NCHWDim: [Int] {
get { get {
if dims.count != 4 { if dims.count != 4 {
return dims return dims
} }
if dataLayout == .NCHW { if dataLayout == DataLayout.NCHW() {
return dims return dims
} else if dataLayout == .NHWC{ } else if dataLayout == DataLayout.NHWC() {
var resultDims = dims var resultDims = dims
resultDims.swapAt(1, 3) resultDims.swapAt(1, 3)
return resultDims return resultDims
...@@ -40,9 +40,9 @@ struct TensorDesc { ...@@ -40,9 +40,9 @@ struct TensorDesc {
if dims.count != 4 { if dims.count != 4 {
return dims return dims
} }
if dataLayout == .NHWC { if dataLayout == DataLayout.NHWC() {
return dims return dims
} else if dataLayout == .NCHW{ } else if dataLayout == DataLayout.NCHW() {
var resultDims = dims var resultDims = dims
resultDims.swapAt(1, 3) resultDims.swapAt(1, 3)
return resultDims return resultDims
...@@ -53,7 +53,7 @@ struct TensorDesc { ...@@ -53,7 +53,7 @@ struct TensorDesc {
} }
init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) { init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 } dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) }
dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
} }
......
...@@ -56,7 +56,7 @@ enum VarTypeType: Int { ...@@ -56,7 +56,7 @@ enum VarTypeType: Int {
} }
} }
struct VarDesc { class VarDesc {
let name: String let name: String
let persistable: Bool let persistable: Bool
let type: VarTypeType let type: VarTypeType
......
...@@ -31,15 +31,14 @@ public struct Dim { ...@@ -31,15 +31,14 @@ public struct Dim {
return dims.reduce(1) { $0 * $1 } return dims.reduce(1) { $0 * $1 }
} }
static func ==(left: Dim, right: Dim) -> Bool { public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims; return left.dims == right.dims;
} }
subscript(index: Int) -> Int { public subscript(index: Int) -> Int {
return dims[index]; return dims[index];
} }
private(set) var dims: [Int] private(set) var dims: [Int]
private init(){ private init(){
fatalError() fatalError()
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
let testTo = 81
var isTest = false
let computePrecision: ComputePrecision = .Float16
public class GPUResultHolder {
public let dim: [Int]
public let capacity: Int
public var resultPointer: UnsafeMutablePointer<Float32>?
public var intermediateResults: [String : [Variant]]?
public let elapsedTime: Double
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
dim = inDim
capacity = inCapacity
if let inInPointer = inPointer {
resultPointer = UnsafeMutablePointer<Float32>.allocate(capacity: inCapacity)
resultPointer?.initialize(from: inInPointer, count: inCapacity)
}
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
}
}
extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
// var str = ""
// str += "Dim: \(dim) \n value:[ "
// if resultArr.count < 20 {
// for d in resultArr {
// str += " \(d) "
// }
// } else {
// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
// str += " \(resultArr[d]) "
// }
// }
// str += " ]"
// return str
fatalError()
}
public var description: String {
return debugDescription
}
}
public class Executor<P: PrecisionType> {
var ops: [Runable & InferShaperable] = []
let program: Program
let device: MTLDevice
let inflightSemaphore: DispatchSemaphore
let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
self.inflightSemaphore = DispatchSemaphore(value: 3)
program = inProgram
device = inDevice
queue = inQueue
// print("before for ")
//print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<block.ops.count {
let opDesc = block.ops[i]
do {
// print("in for i \(i): ")
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// if i == 56 {
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// }
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
ops.append(op)
} catch let error {
throw error
}
}
}
}
public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
}
inflightSemaphore.wait()
let resInput: MTLTexture
if let inPre = preProcessKernle {
do {
try inPre.compute(inputTexuture: input, commandBuffer: buffer)
resInput = inPre.outputTexture
} catch let error {
throw error
}
} else {
resInput = input
}
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim))
program.scope.setInput(input: inputTexture)
//(ops.count - except)
for i in 0..<(ops.count - except) {
let op = ops[i]
do {
try op.run(device: device, buffer: buffer)
} catch let error {
throw error
}
}
var outputTextures: [String : [Variant]]?
if except > 0 {
ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer)
outputTextures = ops[ops.count - except].inputVariant()
}
buffer.addCompletedHandler { [weak self] (commandbuffer) in
// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
// print(inputArr.strideArray())
//
//// print(dim)
// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
// print(" write done ")
// print("write to library done")
// return
// print(inputArr)
//
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
//
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for i in 0..<self!.ops.count {
// let op = self!.ops[i]
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
// return;
// self!.ops[testTo - 2].delogOutput()
// self!.ops[testTo - 1].delogOutput()
// self!.ops[5].delogOutput()
// return
guard let SSelf = self else {
// return
fatalError()
}
let afterDate = Date.init()
var resultHolder: GPUResultHolder
if except > 0 {
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
} else {
let outputVar: Variant = SSelf.program.scope.output()!
let output: FetchHolder = outputVar as! FetchHolder
// let beforeToTensorDate = Date.init()
resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate))
// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate)
// print(timeToTensor)
}
completionHandle(resultHolder)
SSelf.inflightSemaphore.signal()
}
buffer.commit()
}
public func clear() {
program.scope.clear()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import SwiftProtobuf
public class Loader<P: PrecisionType> {
class ParaLoader {
let file: UnsafeMutablePointer<FILE>
let fileSize: Int
var nowIndex: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
nowIndex = 0
}
func read(tensor: Tensor<P>) throws {
guard nowIndex <= fileSize else {
throw PaddleMobileError.loaderError(message: "out of the file range")
}
func pointerReader<T>(type: T.Type) -> T {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
fread(ptr, 1, MemoryLayout<T>.size, file)
nowIndex += MemoryLayout<T>.size
let pointee = ptr.pointee
ptr.deinitialize(count: MemoryLayout<UInt32>.size)
ptr.deallocate()
return pointee
}
let _ = pointerReader(type: UInt32.self)
let lodLevel = pointerReader(type: UInt64.self)
for _ in 0..<lodLevel {
let size = pointerReader(type: UInt64.self)
for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
_ = pointerReader(type: size_t.self)
}
}
let _ = pointerReader(type: UInt32.self)
let tensorDescSize = pointerReader(type: Int32.self)
fseek(file, Int(tensorDescSize), SEEK_CUR)
nowIndex += Int(tensorDescSize)
/*
这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
*/
//现在模型传入模型为 Float 类型, 这块应该根据模型来
// let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
// let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
guard bytesRead == tensor.data.size else {
throw PaddleMobileError.loaderError(message: "param read size error")
}
// TODO: use script to convert
// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
// for i in 0..<tensor.numel() {
// tensor.data[i] = P.init(inFloat: tmpPointer[i])
// }
// tmpPointer.deinitialize(count: tmpCapacity)
// tmpPointer.deallocate()
nowIndex += bytesRead
}
deinit {
fclose(file)
}
}
class ParaLoaderWithPointer {
var paramPointer: UnsafeMutableRawPointer
let paramSize: Int
var nowIndex: Int
init(pPointer: UnsafeMutableRawPointer,pSize:Int) throws {
paramPointer = UnsafeMutableRawPointer.init(pPointer)
paramSize = pSize
nowIndex = 0
}
func read(tensor: Tensor<P>) throws {
guard nowIndex <= paramSize else {
throw PaddleMobileError.loaderError(message: "out of the file range")
}
var readerIndex: Int = 0
func pointerReader<T>(type: T.Type) -> T {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
memcpy(ptr, paramPointer.advanced(by: Int(readerIndex)), MemoryLayout<T>.size)
nowIndex += MemoryLayout<T>.size
readerIndex += MemoryLayout<T>.size
let pointee = ptr.pointee
ptr.deinitialize(count: MemoryLayout<UInt32>.size)
ptr.deallocate()
return pointee
}
let _ = pointerReader(type: UInt32.self)
let lodLevel = pointerReader(type: UInt64.self)
for _ in 0..<lodLevel {
let size = pointerReader(type: UInt64.self)
for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
_ = pointerReader(type: size_t.self)
}
}
let _ = pointerReader(type: UInt32.self)
let tensorDescSize = pointerReader(type: Int32.self)
paramPointer = paramPointer.advanced(by: Int(readerIndex))
paramPointer = paramPointer.advanced(by: Int(tensorDescSize))
nowIndex += Int(tensorDescSize)
let _ = memcpy(tensor.data.pointer, paramPointer, tensor.data.size)
paramPointer = paramPointer.advanced(by: Int(tensor.data.size))
nowIndex += tensor.data.size
}
deinit {
}
}
public init(){}
func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program {
do {
let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
serializedData: modelData)
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
print(programDesc)
guard programDesc.blocks.count > 0 else {
throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
}
// to get feed key and fetch key
let block = programDesc.blocks[0]
guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
throw PaddleMobileError.loaderError(message: "at least two operator")
}
guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
}
guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
}
guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
}
let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
// to load memory
for block in programDesc.blocks {
for varDesc in block.vars {
if (varDesc.type == .LodTensor) {
guard let tensorDesc = varDesc.tensorDesc else {
throw PaddleMobileError.loaderError(message: "get tensor desc failed")
}
if (varDesc.persistable
&& varDesc.type != .FeedMiniBatch
&& varDesc.type != .FetchList) {
let dimArr = tensorDesc.dims
guard dimArr.count > 0 else {
throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
}
let dim = Dim.init(inDim: dimArr)
let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
do {
if paraLoaderPointer != nil {
try paraLoaderPointer!.read(tensor: tensor)
}
if paraLoader != nil {
try paraLoader!.read(tensor: tensor)
}
} catch let error {
throw error
}
// tensor.convert(to: DataLayout.NHWC())
// tensor.initBuffer(device: device)
scope[varDesc.name] = tensor
} else {
let dim = Dim.init(inDim: tensorDesc.dims)
scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
}
} else {
if varDesc.name == fetchKey {
// scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0)
} else if varDesc.name == feedKey {
}
}
}
}
let program = Program.init(inProgramDesc: programDesc, inScope: scope)
return program
} catch _ {
throw PaddleMobileError.loaderError(message: "protobuf decoder error")
}
}
public func load(device:MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) throws -> Program {
let modelData = Data.init(bytes:modePointer, count:modelSize)
guard let paraLoader = try? ParaLoaderWithPointer.init(pPointer: paramPointer,pSize: paramSize) else {
throw PaddleMobileError.loaderError(message: "load para error")
}
do {
let program = try loadModelandParam(device,modelData,paraLoader,nil)
return program
} catch let error {
throw error
}
}
public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
}
guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
throw PaddleMobileError.loaderError(message: "load para error")
}
do {
let program = try loadModelandParam(device,modelData,nil,paraLoader)
return program
} catch let error {
throw error
}
}
}
...@@ -12,251 +12,308 @@ ...@@ -12,251 +12,308 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
import Accelerate
import Foundation import Foundation
protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{ protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{
var dim: Dim { get set } var dim: Dim { get set }
func numel() -> Int func numel() -> Int
var layout: DataLayout { get } var layout: DataLayout { get }
} }
extension Tensorial { extension Tensorial {
func numel() -> Int { func numel() -> Int {
return dim.numel() return dim.numel()
} }
}
public enum ComputePrecision {
case Float32, Float16
} }
class Tensor<P: PrecisionType>: Tensorial { class Tensor<P: PrecisionType>: Tensorial {
enum BufferPrecision {
case Float32, Float16 var data: Data
var dim: Dim
var buffer: MTLBuffer!
private(set) var layout: DataLayout
class Data {
init(inSize: Int, inPointer: UnsafeMutablePointer<P>) {
size = inSize
pointer = inPointer
}
let size: Int
var pointer: UnsafeMutablePointer<P>
subscript(index: Int) -> P{
get {
return pointer[index]
}
set {
pointer[index] = newValue
}
}
func release() {
pointer.deinitialize(count: size)
pointer.deallocate()
}
deinit {
// release()
}
}
required init(inDim: Dim, inLayout: DataLayout = DataLayout.NCHW()) {
dim = inDim
let size = inDim.numel() * MemoryLayout<P>.size
let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
data = Data.init(inSize: size, inPointer: pointer)
layout = inLayout
}
func convert(to: DataLayout) {
guard to != layout else {
return
} }
var data: Data guard dim.cout() == 4 else {
var dim: Dim return
var buffer: MTLBuffer! }
private(set) var layout: DataLayout
class Data { guard layout == DataLayout.NCHW() && to == DataLayout.NHWC() else {
init(inSize: Int, inPointer: UnsafeMutablePointer<P>) { // other not support
size = inSize return
pointer = inPointer
}
let size: Int
var pointer: UnsafeMutablePointer<P>
subscript(index: Int) -> P{
get {
return pointer[index]
}
set {
pointer[index] = newValue
}
}
func release() {
pointer.deinitialize(count: size)
pointer.deallocate()
}
deinit {
// release()
}
} }
let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
required init(inDim: Dim, inLayout: DataLayout = .NCHW) {
dim = inDim if layout == DataLayout.NCHW() {
let size = inDim.numel() * MemoryLayout<P>.size NCHW2NHWC(newPtr: newPointer)
let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
data = Data.init(inSize: size, inPointer: pointer)
layout = inLayout
} }
func convert(to: DataLayout) { data.release()
guard to != layout else { data.pointer = newPointer
return layout = to
} }
guard dim.cout() == 4 else {
return
} func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, convertToNHWC: Bool = true, withTranspose: Bool = false) {
if convertToNHWC {
guard layout == .NCHW && to == .NHWC else { // print(layout)
// other not support convert(to: DataLayout.NHWC())
return
}
let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
if layout == .NCHW {
NCHW2NHWC(newPtr: newPointer)
}
data.release()
data.pointer = newPointer
layout = to
} }
func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) { if withTranspose {
var float32Buffer = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 4) let transposePointer = UnsafeMutablePointer<P>.allocate(capacity: numel())
var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2) let n = dim[0]
guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else { let hwc = numel()/n
fatalError(" float 32 to float 16 error ! ") for j in 0..<hwc {
for i in 0..<n {
//data[i * hwc + j]
transposePointer[j * n + i] = data[i * hwc + j]
} }
}
dim.swapeDimAt(index1: 0, index2: 3)
data.release()
data.pointer = transposePointer
} }
func initBuffer(device: MTLDevice, precision: BufferPrecision = .Float32) { guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else {
guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else { fatalError(" not support yet ")
fatalError(" not support yet ")
}
let precisionSize: Int
switch precision {
case .Float32:
precisionSize = 4
case .Float16:
precisionSize = 2
}
if dim.cout() == 4 {
if layout == .NHWC {
let C = dim[3]
let cSlices = (C + 3) / 4
let paddedC = cSlices * 4
let count = paddedC * dim[0] * dim[1] * dim[2]
if C == paddedC {
buffer = device.makeBuffer(length: count * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
}
} else if C == 1 {
buffer = device.makeBuffer(length: numel() * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
}
} else {
buffer = device.makeBuffer(length: count * precisionSize)
let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
var tmpPointer = floatPointer
var dstPtr = convertedPointer
for _ in 0..<dim[0] * dim[1] * dim[2] {
for j in 0..<paddedC {
if j < C {
dstPtr[j] = tmpPointer[j]
}
}
tmpPointer += C
dstPtr += paddedC
}
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
}
convertedPointer.deinitialize(count: count)
convertedPointer.deallocate()
}
}
} else if dim.cout() == 1 {
buffer = device.makeBuffer(length: numel() * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
}
} else {
fatalError(" not support !")
}
//TODO: release
data.release()
} }
var width: Int { let precisionSize: Int
get { switch precision {
if dim.cout() == 4 { case .Float32:
return dim[1] precisionSize = 4
} else { case .Float16:
fatalError() precisionSize = 2
}
}
} }
var height: Int { if dim.cout() == 4 {
get { if layout == DataLayout.NHWC() {
if dim.cout() == 4 { let C = dim[3]
return dim[2] let cSlices = (C + 3) / 4
} else { let paddedC = cSlices * 4
fatalError() let count = paddedC * dim[0] * dim[1] * dim[2]
if C == paddedC {
buffer = device.makeBuffer(length: count * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
}
} else if C == 1 {
buffer = device.makeBuffer(length: numel() * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
}
} else {
buffer = device.makeBuffer(length: count * precisionSize)
let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
var tmpPointer = floatPointer
var dstPtr = convertedPointer
for _ in 0..<dim[0] * dim[1] * dim[2] {
for j in 0..<paddedC {
if j < C {
dstPtr[j] = tmpPointer[j]
} else {
dstPtr[j] = 0
}
} }
tmpPointer += C
dstPtr += paddedC
}
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
}
convertedPointer.deinitialize(count: count)
convertedPointer.deallocate()
} }
} } else {
let C = dim[3]
var channel: Int { let cSlices = (C + 3) / 4
get { let paddedC = cSlices * 4
if dim.cout() == 4 { let count = paddedC * dim[0] * dim[1] * dim[2]
return dim[3] if C == paddedC {
} else { buffer = device.makeBuffer(length: count * precisionSize)
fatalError() switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
}
} else if C == 1 {
fatalError(" not support ")
} else {
buffer = device.makeBuffer(length: count * precisionSize)
let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
var tmpPointer = floatPointer
var dstPtr = convertedPointer
for _ in 0..<dim[0] * dim[1] * dim[2] {
for j in 0..<paddedC {
if j < C {
dstPtr[j] = tmpPointer[j]
} else {
dstPtr[j] = 0
}
} }
tmpPointer += C
dstPtr += paddedC
}
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
}
convertedPointer.deinitialize(count: count)
convertedPointer.deallocate()
} }
}
} else if dim.cout() == 1 {
let num = ((numel() + 3) / 4) * 4
buffer = device.makeBuffer(length: num * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: num * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: num)
}
} else {
fatalError(" not support !")
} }
//TODO: release
data.release()
}
var width: Int {
get {
if dim.cout() == 4 {
return dim[1]
} else {
fatalError()
}
}
}
var height: Int {
get {
if dim.cout() == 4 {
return dim[2]
} else {
fatalError()
}
}
}
var channel: Int {
get {
if dim.cout() == 4 {
return dim[3]
} else {
fatalError()
}
}
}
func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) {
let N = dim[0]
let C = dim[1]
let H = dim[2]
let W = dim[3]
let HXW = H * W
let CXHXW = C * H * W
func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) { var index: Int = 0
let N = dim[0] for n in 0..<N {
let C = dim[1] for h in 0..<H{
let H = dim[2] for w in 0..<W{
let W = dim[3] for c in 0..<C{
let HXW = H * W newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
let CXHXW = C * H * W index += 1
}
var index: Int = 0
for n in 0..<N {
for h in 0..<H{
for w in 0..<W{
for c in 0..<C{
newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
index += 1
}
}
}
} }
dim.swapeDimAt(index1: 1, index2: 3) }
} }
dim.swapeDimAt(index1: 1, index2: 3)
}
} }
extension Tensor { extension Tensor {
var debugDescription: String { var debugDescription: String {
var str = "dim: \(dim) \n" var str = "dim: \(dim) \n"
str += "MTLBuffer: \(self.buffer) \n" str += "MTLBuffer: \(self.buffer) \n"
for i in 0..<buffer.length/MemoryLayout<P>.size { for i in 0..<buffer.length/MemoryLayout<P>.size {
str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
}
return str
} }
return str
func logDataPointer(header: String = "") { }
print(header)
var str = "" func logDataPointer(header: String = "") {
str += "data size: \(data.size) \n" print(header)
str += "dim: \(dim) \n" var str = ""
for i in 0..<numel() { str += "data size: \(data.size) \n"
str += " \(data.pointer[i])" str += "dim: \(dim) \n"
} for i in 0..<numel() {
print(str) str += " \(data.pointer[i])"
} }
print(str)
var description: String { }
return debugDescription
} var description: String {
return debugDescription
}
} }
...@@ -16,127 +16,163 @@ import Metal ...@@ -16,127 +16,163 @@ import Metal
import Foundation import Foundation
class InputTexture { class InputTexture {
let mtlTexture: MTLTexture let mtlTexture: MTLTexture
let expectDim: Dim let expectDim: Dim
init(inMTLTexture: MTLTexture, inExpectDim: Dim) { init(inMTLTexture: MTLTexture, inExpectDim: Dim) {
mtlTexture = inMTLTexture mtlTexture = inMTLTexture
expectDim = inExpectDim expectDim = inExpectDim
} }
} }
extension InputTexture { extension InputTexture {
var description: String { var description: String {
get{ get{
return mtlTexture.description return mtlTexture.description
}
} }
}
var debugDescription: String {
get { var debugDescription: String {
return mtlTexture.debugDescription ?? " MetalTexture " get {
} return mtlTexture.debugDescription ?? " MetalTexture "
} }
}
} }
public class Texture<P: PrecisionType>: Tensorial {
var dim: Dim
let textureDesc: MTLTextureDescriptor
var metalTexture: MTLTexture
init(device: MTLDevice, inDim: Dim, inLayout: DataLayout = .NHWC) {
dim = inDim
layout = inLayout
let tmpTextureDes = MTLTextureDescriptor.init()
if inDim.cout() == 1 {
tmpTextureDes.width = inDim[0]
tmpTextureDes.textureType = .type1D
} else if inDim.cout() == 4 {
tmpTextureDes.height = inDim[1]
tmpTextureDes.width = inDim[2]
// print("n : \(inDim[0])")
// print(inDim[3] * inDim[0])
tmpTextureDes.depth = 1
tmpTextureDes.arrayLength = (inDim[3] * inDim[0] + 3)/4
tmpTextureDes.textureType = .type2DArray
} else if inDim.cout() == 2 {
tmpTextureDes.height = 1
tmpTextureDes.width = 1
tmpTextureDes.depth = 1
tmpTextureDes.arrayLength = (inDim[0] * inDim[1] + 3)/4
tmpTextureDes.textureType = .type2DArray
} else {
fatalError(" not suuprt ")
}
if MemoryLayout<P>.size == 1 {
tmpTextureDes.pixelFormat = .rgba8Unorm
} else if MemoryLayout<P>.size == 2 {
tmpTextureDes.pixelFormat = .rgba16Float
} else if MemoryLayout<P>.size == 4 {
// tmpTextureDes.pixelFormat = .r32Float
tmpTextureDes.pixelFormat = .rgba32Float
} /*
// tmpTextureDes.pixelFormat = .rgba16Float 4 维 tensor 存储 texture,要考虑 transpose
transpose 之后的维度是 [a, b, c, d],对应的texture_2darray
.width = c
.height = b
.len = a * d + 3 / 4
低于 4 维的 tensor,transpose 必须为 [0, 1, 2, 3] 既不考虑 transpose
// TODO transpose 对于低维 tensor 的扩展原则。。。
// [a, b] -> [1, 1, a, b] transpose 必须为 [0, 1, x, x]
// [a] -> [1, 1, 1, a] transpose 必须为 [0, 1, 2, 3]
// [a, b, c] -> [1, a, b, c] tranpose 必须为 [0, x, x, x]
3 维 tensor [a, b, c] 对应的 texture_2darray,
.width = c
.height = b
.len = a + 3 / 4
2 维 tensor [a, b] 对应的 texture_2darray
.width = b + 3 / 4
.height = a
.len = 1
1 维 tensor [a] 对应的 texture_2darray
.width = a + 3 / 4
.height = 1
.len = 1
*/
tmpTextureDes.usage = [.shaderRead, .shaderWrite] public class Texture<P: PrecisionType>: Tensorial {
tmpTextureDes.storageMode = .shared var dim: Dim
textureDesc = tmpTextureDes public var tensorDim: Dim
metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil " public var padToFourDim: Dim
private var textureDesc: MTLTextureDescriptor!
public var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3]
func toTensor() -> [Float32] {
guard padToFourDim.cout() == 4 else {
fatalError("- not support -")
} }
return metalTexture.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
}
func realNHWC() -> [Float32] {
guard padToFourDim.cout() == 4 else {
fatalError(" - not support - ")
}
return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
}
func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
transpose = inTranspose
for i in 0..<(4 - tensorDim.cout()) {
if i != inTranspose[i] {
fatalError()
}
}
let newDim = transpose.map { padToFourDim[$0] }
// required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) { let newLayout = transpose.map { layout.layoutWithDim[$0] }
// dim = inDim
// layout = inLayout
// metalTexture = inTexture
// let tmpTextureDes = MTLTextureDescriptor.init()
//
// if inDim.cout() == 1 {
// tmpTextureDes.width = inDim[0]
// tmpTextureDes.textureType = .type1D
// } else if inDim.cout() == 2 {
// tmpTextureDes.height = inDim[0]
// tmpTextureDes.width = inDim[1]
// tmpTextureDes.textureType = .type2D
// } else if inDim.cout() == 3 {
// fatalError(" not support texture dim 3")
// } else if inDim.cout() == 4 {
// tmpTextureDes.height = inDim[1]
// tmpTextureDes.width = inDim[2]
// tmpTextureDes.depth = inDim[3] * inDim[1]
// tmpTextureDes.textureType = .type2DArray
// }
//
// tmpTextureDes.pixelFormat = .r32Float
// tmpTextureDes.storageMode = .shared
// textureDesc = tmpTextureDes
// let device = MTLCreateSystemDefaultDevice()
// metalTexture = device!.makeTexture(descriptor: tmpTextureDes)!
// }
// init() { layout = DataLayout.init(newLayout)
// dim = Dim.init(inDim: []) dim = Dim.init(inDim: newDim)
// layout = .NCHW
// let device = MTLCreateSystemDefaultDevice()
// textureDesc = MTLTextureDescriptor.init()
// metalTexture = device!.makeTexture(descriptor: textureDesc)!
// }
private(set) var layout: DataLayout let tmpTextureDes = MTLTextureDescriptor.init()
} tmpTextureDes.textureType = .type2DArray
tmpTextureDes.depth = 1
extension Texture {
public var description: String {
return debugDescription
}
public var debugDescription: String{ switch tensorDim.cout() {
var str = "" case 4:
str += "Dim: \(dim) \n value:[ " tmpTextureDes.width = newDim[2]
str += "\(metalTexture)" tmpTextureDes.height = newDim[1]
str += " ]" tmpTextureDes.arrayLength = ((newDim[0]) * (newDim[3]) + 3) / 4
return str case 3:
tmpTextureDes.width = newDim[3]
tmpTextureDes.height = newDim[2]
tmpTextureDes.arrayLength = (newDim[1] + 3) / 4
case 2, 1:
tmpTextureDes.width = (newDim[3] + 3) / 4
tmpTextureDes.height = newDim[2]
tmpTextureDes.arrayLength = 1
default:
fatalError("unreachable")
}
if computePrecision == .Float16 {
tmpTextureDes.pixelFormat = .rgba16Float
} else if computePrecision == .Float32 {
tmpTextureDes.pixelFormat = .rgba32Float
} }
tmpTextureDes.usage = [.shaderRead, .shaderWrite]
tmpTextureDes.storageMode = .shared
textureDesc = tmpTextureDes
metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
}
init(device: MTLDevice, inDim: Dim) {
var fourDim: Dim
if inDim.cout() == 4 {
fourDim = inDim
} else if inDim.cout() < 4 {
var fourDimNum: [Int] = []
for _ in 0..<(4 - inDim.cout()) {
fourDimNum.append(1)
}
fourDimNum.append(contentsOf: inDim.dims)
fourDim = Dim.init(inDim: fourDimNum)
} else {
fatalError(" not support ")
}
tensorDim = inDim
dim = fourDim
padToFourDim = fourDim
layout = DataLayout.init([(.N, fourDim[0]), (.C, fourDim[1]), (.H, fourDim[2]), (.W, fourDim[3])])
}
private(set) var layout: DataLayout
}
extension Texture {
public var description: String {
return debugDescription
}
public var debugDescription: String{
var str = ""
str += "Dim: \(dim) \n value:[ "
str += "\(metalTexture)"
str += " ]"
return str
}
} }
...@@ -14,12 +14,15 @@ ...@@ -14,12 +14,15 @@
#pragma once #pragma once
#import "PaddleMobileCPU.h"
#import "CPUCompute.h"
#import "PaddleMobileGPU.h"
#import <UIKit/UIKit.h> #import <UIKit/UIKit.h>
//! Project version number for paddle_mobile. //! Project version number for paddle_mobile.
FOUNDATION_EXPORT double paddle_mobileVersionNumber; //FOUNDATION_EXPORT double paddle_mobileVersionNumber;
//! Project version string for paddle_mobile. //! Project version string for paddle_mobile.
FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[]; //FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[];
...@@ -311,6 +311,8 @@ int get_aligned_filter_num(int num) { ...@@ -311,6 +311,8 @@ int get_aligned_filter_num(int num) {
void format_filter(framework::Tensor *filter_tensor, float max_value, void format_filter(framework::Tensor *filter_tensor, float max_value,
int group_num) { int group_num) {
filter_tensor->scale[0] = float(max_value / 127.0);
filter_tensor->scale[1] = float(127.0 / max_value);
auto dims = filter_tensor->dims(); auto dims = filter_tensor->dims();
auto num = dims[0], channel = dims[1], height = dims[2], width = dims[3]; auto num = dims[0], channel = dims[1], height = dims[2], width = dims[3];
auto data_ptr = filter_tensor->data<float>(); auto data_ptr = filter_tensor->data<float>();
......
...@@ -676,11 +676,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) { ...@@ -676,11 +676,11 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
to_predict_program_->Block(0); to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()]; auto &ops = ops_of_block_[*to_predict_block.get()];
PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range"); PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range");
auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id]; auto op = id < 0 ? ops[ops.size() - 1] : ops[id];
auto output_map = last_op->Outputs(); auto output_map = op->Outputs();
std::vector<std::string> out_keys = last_op->GetOutKeys(); std::vector<std::string> out_keys = op->GetOutKeys();
PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output"); PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "this op contains no output");
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>( auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope)); out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor)); return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
......
...@@ -50,8 +50,8 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -50,8 +50,8 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
void RunImpl() const { void RunImpl() const {
auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX()); auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
auto input_ptr = input->data<float>();
fpga::format_image(input); fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param_.Out(); Tensor *output = param_.Out();
auto output_ptr = output->data<float>(); auto output_ptr = output->data<float>();
......
...@@ -47,7 +47,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) { ...@@ -47,7 +47,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
concatArgs.image_num = (uint32_t)image_num; concatArgs.image_num = (uint32_t)image_num;
concatArgs.images_in = images_in; concatArgs.images_in = images_in;
concatArgs.scales_in = scales_in; concatArgs.scales_in = scales_in;
concatArgs.image_out = (half *)out->mutable_data<float>(); concatArgs.image_out = (half *)out->data<float>();
concatArgs.scale_out = out->scale; concatArgs.scale_out = out->scale;
concatArgs.channel_num = channel_num; concatArgs.channel_num = channel_num;
concatArgs.height = (uint32_t)height; concatArgs.height = (uint32_t)height;
......
...@@ -39,8 +39,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -39,8 +39,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.image.height = 1; args.image.height = 1;
args.image.width = 1; args.image.width = 1;
args.image.channels = (uint32_t)input->dims()[1]; args.image.channels = (uint32_t)input->dims()[1];
args.output.address = float_input->mutable_data<float>(); args.output.address = float_input->data<float>();
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input); param->SetFloatInput(float_input);
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
......
...@@ -21,8 +21,15 @@ int main() { ...@@ -21,8 +21,15 @@ int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
<<<<<<< HEAD
auto program = loader.Load(g_mobilenet_ssd, false, false);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
=======
// auto program = loader.Load(g_googlenet, true); // auto program = loader.Load(g_googlenet, true);
// auto program = loader.Load(g_mobilenet_ssd, true); // auto program = loader.Load(g_mobilenet_ssd, true);
>>>>>>> e60ab7ae5a43b9cc788813877fbfffc67c87b5f3
auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params", false); std::string(g_ocr) + "/params", false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册