diff --git a/.gitignore b/.gitignore index a7b444b5881c8b993c6edbb4a7ba555359dcab39..8f92118b08bb30531869c28d32d335cc47116350 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ *.lai *.la *.lib +*.a # Executables *.exe @@ -70,7 +71,10 @@ build cmake-build-debug cmake-build-release -#ios demo +# ios +tools/libomp.a + +# ios demo demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/ demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a @@ -84,6 +88,7 @@ SwiftProtobuf.framework paddle-mobile.xcworkspace metal/models/ metal/images/ - - -tools/libomp.a \ No newline at end of file +*.a +metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a +*.xcuserdatad/ +*/xcuserdata/ diff --git a/README.md b/README.md index fd5222655821e36fe194225a4d71a3b60b8a89d5..de7dd530c94b4a3055cbf07a4a19a55c21457ed0 100644 --- a/README.md +++ b/README.md @@ -69,8 +69,18 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平 - **苹果设备的GPU Metal实现** - 基于Metal实现的苹果设备的GPU预测库,也已经在实现中,近期也会有相应可运行版本。 - +|mobilenetfssd|速度| +|------------|-----| +|A9(ms)|33.78| +|A10(ms)|24.05| +|A11(ms)|17.15| +||| +|genet|速度| +|A9(ms) |3.49| +|A10(ms)|2.54| +|A11(ms)|1.43| + + - **FPGA** FPGA实现正在进行中,是基于Xilinx的ZU5目标开发板。 diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj index f3ab9fc66a072cd5b0bbba56ae99258f04be3612..d6114880efcaf528bd26fcda11e08ec68d943575 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj @@ -8,22 +8,29 @@ /* Begin PBXBuildFile section */ 30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; }; + C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; }; FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; }; FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; }; FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; }; FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; }; FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; }; FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; }; - FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; }; - FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; }; - FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; }; - FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; }; - FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; }; - FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */; }; - FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; }; + FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; }; + FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; }; + FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; - FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; }; + FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; }; + FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B632152858600DECA15 /* hand.jpg.zip */; }; + FCFE9B6A2152858600DECA15 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B642152858600DECA15 /* synset.txt */; }; + FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B652152858600DECA15 /* banana.jpeg */; }; + FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B662152858600DECA15 /* hand.jpg */; }; + FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B672152858600DECA15 /* iphone.JPG */; }; + FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B682152858600DECA15 /* paddle-mobile.png */; }; + FCFE9C512152859600DECA15 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B752152859500DECA15 /* genet_params */; }; + FCFE9C522152859600DECA15 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9B762152859500DECA15 /* genet_model */; }; + FCFE9D232152859600DECA15 /* ar_model in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4C2152859500DECA15 /* ar_model */; }; + FCFE9D242152859600DECA15 /* ar_params in Resources */ = {isa = PBXBuildFile; fileRef = FCFE9C4D2152859500DECA15 /* ar_params */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -44,6 +51,8 @@ 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = ""; }; 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = ""; }; + C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = ""; }; + C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = ""; }; FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = ""; }; FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; }; FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; @@ -52,15 +61,23 @@ FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; - FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-unit-test/paddle-mobile-unit-test/MetalHelper.swift"; sourceTree = ""; }; - FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = ""; }; - FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = ""; }; - FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = ""; }; - FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = ""; }; - FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelHelper.swift; sourceTree = ""; }; - FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = ""; }; + FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = ""; }; + FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; }; + FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = ""; }; + FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; + FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = ""; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = ""; }; + FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = ""; }; + FCFE9B632152858600DECA15 /* hand.jpg.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = hand.jpg.zip; sourceTree = ""; }; + FCFE9B642152858600DECA15 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = ""; }; + FCFE9B652152858600DECA15 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = ""; }; + FCFE9B662152858600DECA15 /* hand.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = hand.jpg; sourceTree = ""; }; + FCFE9B672152858600DECA15 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = ""; }; + FCFE9B682152858600DECA15 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = ""; }; + FCFE9B752152859500DECA15 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = ""; }; + FCFE9B762152859500DECA15 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = ""; }; + FCFE9C4C2152859500DECA15 /* ar_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_model; sourceTree = ""; }; + FCFE9C4D2152859500DECA15 /* ar_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = ar_params; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -88,6 +105,7 @@ 7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = { isa = PBXGroup; children = ( + FC4FD97B2140EE250073E130 /* libc++.tbd */, 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */, ); name = Frameworks; @@ -115,49 +133,82 @@ FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = { isa = PBXGroup; children = ( - FC0E2C2020EDC03B009C1FAC /* models */, - FC0E2C1D20EDC030009C1FAC /* images */, + FCFE9B6F2152859500DECA15 /* models */, + FCFE9B622152858600DECA15 /* images */, + FC803BCA214D27920094B8E5 /* VideoCapture */, + FC8CFED2213519540094D569 /* Net */, FC039B8120E11C550081E9F8 /* AppDelegate.swift */, - FC013927210204A3008100E3 /* PreProcessKernel.metal */, FC039B8320E11C550081E9F8 /* ViewController.swift */, FC039B8520E11C550081E9F8 /* Main.storyboard */, FC039B8820E11C560081E9F8 /* Assets.xcassets */, FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */, FC039B8D20E11C560081E9F8 /* Info.plist */, - FC3602C72108580600FACB58 /* MetalHelper.swift */, - FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */, + FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */, + FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */, + C2E67E5C21524E460013F575 /* LoadPointerViewController.h */, + C2E67E5D21524E460013F575 /* LoadPointerViewController.m */, ); path = "paddle-mobile-demo"; sourceTree = ""; }; - FC0E2C1D20EDC030009C1FAC /* images */ = { + FC803BCA214D27920094B8E5 /* VideoCapture */ = { + isa = PBXGroup; + children = ( + FC803BCB214D27920094B8E5 /* FPSCounter.swift */, + FC803BCC214D27920094B8E5 /* VideoCapture.swift */, + ); + path = VideoCapture; + sourceTree = ""; + }; + FC8CFED2213519540094D569 /* Net */ = { + isa = PBXGroup; + children = ( + FC013927210204A3008100E3 /* PreProcessKernel.metal */, + FCBCCC542122EF5400D94F7E /* MetalHelper.swift */, + ); + path = Net; + sourceTree = ""; + }; + FCFE9B622152858600DECA15 /* images */ = { isa = PBXGroup; children = ( - FC918192211DC70500B6F354 /* iphone.JPG */, - FC918190211DBC3500B6F354 /* paddle-mobile.png */, - FCDFD41A211D91C7005AB38B /* synset.txt */, - FCEEE7D3210627A000444BEC /* banana.jpeg */, + FCFE9B632152858600DECA15 /* hand.jpg.zip */, + FCFE9B642152858600DECA15 /* synset.txt */, + FCFE9B652152858600DECA15 /* banana.jpeg */, + FCFE9B662152858600DECA15 /* hand.jpg */, + FCFE9B672152858600DECA15 /* iphone.JPG */, + FCFE9B682152858600DECA15 /* paddle-mobile.png */, ); name = images; path = ../../images; sourceTree = ""; }; - FC0E2C2020EDC03B009C1FAC /* models */ = { + FCFE9B6F2152859500DECA15 /* models */ = { isa = PBXGroup; children = ( - FCD04E6020F3146A0007374F /* mobilenet */, + FCFE9B742152859500DECA15 /* genet */, + FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */, ); name = models; path = ../../models; sourceTree = ""; }; - FCD04E6020F3146A0007374F /* mobilenet */ = { + FCFE9B742152859500DECA15 /* genet */ = { + isa = PBXGroup; + children = ( + FCFE9B752152859500DECA15 /* genet_params */, + FCFE9B762152859500DECA15 /* genet_model */, + ); + path = genet; + sourceTree = ""; + }; + FCFE9C4B2152859500DECA15 /* fluid_fssd_new_ar */ = { isa = PBXGroup; children = ( - FCD04E6120F3146A0007374F /* params */, - FCD04E6220F3146A0007374F /* model */, + FCFE9C4C2152859500DECA15 /* ar_model */, + FCFE9C4D2152859500DECA15 /* ar_params */, ); - path = mobilenet; + path = fluid_fssd_new_ar; sourceTree = ""; }; /* End PBXGroup section */ @@ -195,6 +246,7 @@ TargetAttributes = { FC039B7D20E11C550081E9F8 = { CreatedOnToolsVersion = 9.3.1; + LastSwiftMigration = 0940; }; }; }; @@ -221,14 +273,18 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - FCD04E6320F3146B0007374F /* params in Resources */, + FCFE9D232152859600DECA15 /* ar_model in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, - FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */, + FCFE9C522152859600DECA15 /* genet_model in Resources */, + FCFE9D242152859600DECA15 /* ar_params in Resources */, + FCFE9B6E2152858600DECA15 /* paddle-mobile.png in Resources */, + FCFE9C512152859600DECA15 /* genet_params in Resources */, + FCFE9B692152858600DECA15 /* hand.jpg.zip in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, - FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */, - FC918193211DC70500B6F354 /* iphone.JPG in Resources */, - FCDFD41B211D91C7005AB38B /* synset.txt in Resources */, - FCD04E6420F3146B0007374F /* model in Resources */, + FCFE9B6A2152858600DECA15 /* synset.txt in Resources */, + FCFE9B6B2152858600DECA15 /* banana.jpeg in Resources */, + FCFE9B6D2152858600DECA15 /* iphone.JPG in Resources */, + FCFE9B6C2152858600DECA15 /* hand.jpg in Resources */, FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -280,10 +336,13 @@ buildActionMask = 2147483647; files = ( FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */, - FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */, + FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */, FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */, + FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */, + FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */, + FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */, + C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */, FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */, - FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -428,19 +487,23 @@ baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ENABLE_MODULES = YES; CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = A798K58VVL; + ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal; + PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile"; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE = ""; PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h"; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_VERSION = 4.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -451,19 +514,22 @@ baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CLANG_ENABLE_MODULES = YES; CODE_SIGN_IDENTITY = "iPhone Developer"; CODE_SIGN_STYLE = Automatic; DEVELOPMENT_TEAM = A798K58VVL; + ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; IPHONEOS_DEPLOYMENT_TARGET = 9.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", ); - PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal; + PRODUCT_BUNDLE_IDENTIFIER = "com.baidu.paddle-mobile"; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE = ""; PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_OBJC_BRIDGING_HEADER = "paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h"; SWIFT_VERSION = 4.0; TARGETED_DEVICE_FAMILY = "1,2"; }; diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme deleted file mode 100644 index 46c65bd36a9ab7027b1cb7a81533dcd553ccb62e..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist deleted file mode 100644 index 8f61f4a88a7bcbe39bbb56e22ef203803776fdec..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist +++ /dev/null @@ -1,22 +0,0 @@ - - - - - SchemeUserState - - paddle-mobile-demo.xcscheme - - orderHint - 2 - - - SuppressBuildableAutocreation - - FC039B7D20E11C550081E9F8 - - primary - - - - - diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift index 54dad2b5bf721f3d132bad2502d30b34ca0773ab..537fb06ed9e5b9100bea43b7acae9c014e0f4a78 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift @@ -19,7 +19,6 @@ class AppDelegate: UIResponder, UIApplicationDelegate { var window: UIWindow? - func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool { // Override point for customization after application launch. return true diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard index a5efadeb97ccc41449dc32a2c1dfcdfcf9fceac5..d72694fdacf5b46821ba6422fa77e095f92382b9 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard @@ -11,6 +11,34 @@ + + + + + + + + + + + + + + + + + + + + + + @@ -20,12 +48,11 @@ - + - + @@ -159,11 +190,12 @@ - + + @@ -175,10 +207,12 @@ + + @@ -195,11 +229,81 @@ + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h new file mode 100644 index 0000000000000000000000000000000000000000..a876c236219817bf146cfa4a77eb9421f8472971 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.h @@ -0,0 +1,13 @@ +// +// LoadPointerViewController.h +// paddle-mobile-demo +// +// Created by Xiao,Haichun on 2018/9/19. +// Copyright © 2018年 orange. All rights reserved. +// + +#import + +@interface LoadPointerViewController : UIViewController + +@end diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m new file mode 100644 index 0000000000000000000000000000000000000000..857745686fbe750de08e8be357ccf5a4159eaae8 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/LoadPointerViewController.m @@ -0,0 +1,171 @@ +// +// LoadPointerViewController.m +// paddle-mobile-demo +// +// Created by Xiao,Haichun on 2018/9/19. +// Copyright © 2018年 orange. All rights reserved. +// + +#import "LoadPointerViewController.h" +#import +#import "paddle-mobile-demo-Bridging-Header.h" + +@interface LoadPointerViewController () + +@property (strong, nonatomic) id device; +@property (strong, nonatomic) id texture; +@property (strong, nonatomic) id queue; +@property (strong, nonatomic) PaddleMobileGPU *runner; +@property (strong, nonatomic) ModelConfig *modelConfig; + +@end + +@implementation LoadPointerViewController + +- (void)viewDidLoad { + [super viewDidLoad]; + + + self.device = MTLCreateSystemDefaultDevice(); + + self.queue = [self.device newCommandQueue]; + + // Do any additional setup after loading the view. +// NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"genet_model" withExtension:nil].path; +// NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"genet_params" withExtension:nil].path; + + NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"ar_model" withExtension:nil].path; + NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"ar_params" withExtension:nil].path; + + long fileSize; + FILE *fp; + fp = fopen([modelPath UTF8String], "rb"); + fseek(fp, 0, SEEK_END); + fileSize = ftell(fp); + rewind(fp); + void *buffer = malloc(fileSize); + fread(buffer, 1, fileSize, fp); + fclose(fp); + + long paramfileSize; + FILE *parmaFilePointer; + parmaFilePointer = fopen([paramPath UTF8String], "rb"); + fseek(parmaFilePointer, 0, SEEK_END); + paramfileSize = ftell(parmaFilePointer); + rewind(parmaFilePointer); + void *parmaBuffer = malloc(paramfileSize); + fread(parmaBuffer, 1, paramfileSize, parmaFilePointer); + fclose(parmaFilePointer); + + _modelConfig = [[ModelConfig alloc] init]; +// _modelConfig.means = @[[NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0]]; +// _modelConfig.scale = 0.017; +// _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:128.], [NSNumber numberWithFloat:128.0],[NSNumber numberWithFloat:3.0]]; + _modelConfig.means = @[[NSNumber numberWithFloat:103.94], [NSNumber numberWithFloat:116.78], [NSNumber numberWithFloat:123.68]]; + _modelConfig.scale = 1; + _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:160.], [NSNumber numberWithFloat:160.0],[NSNumber numberWithFloat:3.0]]; + _modelConfig.modelPointer = buffer; + _modelConfig.modelSize = (int)fileSize; + _modelConfig.paramPointer = parmaBuffer; + _modelConfig.paramSize = (int)paramfileSize; +} +- (IBAction)loaderButtonPressed:(id)sender { +// _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:GenetType modelConfig:_modelConfig]; + _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:MobileNetSSDType modelConfig:_modelConfig]; + + [_runner load]; +} +- (IBAction)predictButtonPressed:(id)sender { + [self predict]; +} + +- (id) createTextureFromImage:(UIImage*) image device:(id) device +{ + image =[UIImage imageWithCGImage:[image CGImage] + scale:[image scale] + orientation: UIImageOrientationLeft]; + + NSLog(@"orientation and size and stuff %ld %f %f", (long)image.imageOrientation, image.size.width, image.size.height); + + CGImageRef imageRef = image.CGImage; + + size_t width = self.view.frame.size.width; + size_t height = self.view.frame.size.height; + + size_t bitsPerComponent = CGImageGetBitsPerComponent(imageRef); + size_t bitsPerPixel = CGImageGetBitsPerPixel(imageRef); + + CGColorSpaceRef colorSpace = CGImageGetColorSpace(imageRef); + + CGImageAlphaInfo alphaInfo = CGImageGetAlphaInfo(imageRef); + + // NSLog(@"%@ %u", colorSpace, alphaInfo); + + CGBitmapInfo bitmapInfo = kCGBitmapByteOrderDefault | alphaInfo; + // NSLog(@"bitmap info %u", bitmapInfo); + + + CGContextRef context = CGBitmapContextCreate( NULL, width, height, bitsPerComponent, (bitsPerPixel / 8) * width, colorSpace, bitmapInfo); + + if( !context ) + { + NSLog(@"Failed to load image, probably an unsupported texture type"); + return nil; + } + + CGContextDrawImage( context, CGRectMake( 0, 0, width, height ), image.CGImage); + + + MTLPixelFormat format = MTLPixelFormatRGBA8Unorm; + + MTLTextureDescriptor *texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format + width:width + height:height + mipmapped:NO]; + id texture = [device newTextureWithDescriptor:texDesc]; + + [texture replaceRegion:MTLRegionMake2D(0, 0, width, height) + mipmapLevel:0 + withBytes:CGBitmapContextGetData(context) + bytesPerRow:4 * width]; + + return texture; +} + +- (void)predict { + _texture = [self createTextureFromImage:[UIImage imageNamed:@"hand.jpg"] device:self.device]; + NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970]; + NSInteger max = 428; + for (int i = 0;i < max; i ++) { + [_runner predict:_texture withCompletion:^(BOOL success , NSArray *result) { + if (success) { + if (i == max -1) { + double time = [[NSDate date] timeIntervalSince1970] - startTime; + time = (time/max)*1000; + NSLog(@"gap ==== %fms",time); + } +// for (int i = 0; i < result.count; i ++) { +// NSNumber *number = result[i]; +// NSLog(@"result %d = %f:",i, [number floatValue]); +// } + } + }]; + } +} + +- (void)didReceiveMemoryWarning { + [super didReceiveMemoryWarning]; + // Dispose of any resources that can be recreated. +} + +/* +#pragma mark - Navigation + +// In a storyboard-based application, you will often want to do a little preparation before navigation +- (void)prepareForSegue:(UIStoryboardSegue *)segue sender:(id)sender { + // Get the new view controller using [segue destinationViewController]. + // Pass the selected object to the new view controller. +} +*/ + +@end diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift deleted file mode 100644 index 74fa89d93e042f90fe1b590a596ec584fff67f6d..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift +++ /dev/null @@ -1,48 +0,0 @@ -// -// MetalHelper.swift -// paddle-mobile-demo -// -// Created by liuRuiLong on 2018/7/25. -// Copyright © 2018年 orange. All rights reserved. -// - -import Metal -import MetalKit -import Foundation -import paddle_mobile -import MetalPerformanceShaders - -class MetalHelper { - let device: MTLDevice - let queue: MTLCommandQueue - let textureLoader: MTKTextureLoader - static let shared: MetalHelper = MetalHelper.init() - private init(){ - device = MTLCreateSystemDefaultDevice()! - queue = device.makeCommandQueue()! - textureLoader = MTKTextureLoader.init(device: device) - } - - static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) { - let tmpTextureDes = MTLTextureDescriptor.init() - tmpTextureDes.width = size.width - tmpTextureDes.height = size.height - tmpTextureDes.depth = 1 - tmpTextureDes.usage = [.shaderRead, .shaderWrite] - tmpTextureDes.pixelFormat = .rgba32Float - tmpTextureDes.textureType = .type2D - tmpTextureDes.storageMode = .shared - tmpTextureDes.cpuCacheMode = .defaultCache - let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes) - - let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device) - - let buffer = queue.makeCommandBuffer() - scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!) - buffer?.addCompletedHandler({ (buffer) in - complete(dest!) - }) - buffer?.commit() - } -} - diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift deleted file mode 100644 index 7e1f66855e45453eee9fdbe034a309aee44ff960..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift +++ /dev/null @@ -1,89 +0,0 @@ -// -// ModelHelper.swift -// paddle-mobile-demo -// -// Created by liuRuiLong on 2018/8/10. -// Copyright © 2018年 orange. All rights reserved. -// - -import UIKit -import MetalKit -import Foundation -import paddle_mobile -import MetalPerformanceShaders - -class PreProccess: CusomKernel { - init(device: MTLDevice) { - let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) - super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false) - } -} - -let modelHelperMap: [SupportModel : ModelHelper] = [.mobilenet : MobileNetHelper.init()] - -enum SupportModel: String{ - case mobilenet = "mobilenet" - static func supportedModels() -> [SupportModel] { - return [.mobilenet] - } -} - -protocol ModelHelper { - var dim: [Int] { get } - var modelPath: String { get } - var paramPath: String { get } - var modelDir: String { get } - var preprocessKernel: CusomKernel { get } - func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) - func resultStr(res: [Float]) -> String -} - -extension ModelHelper { - func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { - let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" - MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in - getTexture(resTexture) - } - } -} - -struct MobileNetHelper: ModelHelper{ - class PreWords { - var contents: [String] = [] - init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) { - if let filePath = inBundle.path(forResource: fileName, ofType: type) { - let string = try! String.init(contentsOfFile: filePath) - contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{ - String($0[$0.index($0.startIndex, offsetBy: 10)...]) - } - }else{ - fatalError("no file call \(fileName)") - } - } - subscript(index: Int) -> String{ - return contents[index] - } - } - let labels = PreWords.init(fileName: "synset") - - func resultStr(res: [Float]) -> String { - var s: [String] = [] - res.top(r: 5).enumerated().forEach{ - s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) - } - return s.joined(separator: "\n") - } - - var preprocessKernel: CusomKernel - let dim = [1, 224, 224, 3] - let modelPath: String - let paramPath: String - let modelDir: String - - init() { - modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" - paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" - modelDir = "" - preprocessKernel = PreProccess.init(device: MetalHelper.shared.device) - } -} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift new file mode 100644 index 0000000000000000000000000000000000000000..bd07da61d0215b243372c27addf60efc3b2ad7d6 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/MultiPredictViewController.swift @@ -0,0 +1,66 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import UIKit +import paddle_mobile + +class MultiPredictViewController: UIViewController { + var runner1: Runner! + var runner2: Runner! + override func viewDidLoad() { + super.viewDidLoad() + let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device) + let genet = Genet.init(device: MetalHelper.shared.device) + runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) + let queue2 = MetalHelper.shared.device.makeCommandQueue() + + runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) + } + + @IBAction func predictAct(_ sender: Any) { + let success = self.runner2.load() +// DispatchQueue.global().async { + let image1 = UIImage.init(named: "hand.jpg") +// let success = self.runner2.load() +// if success { +// for i in 0..<10000 { +// print(i) +// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in +// print("result1: ") +//// print(res) +// }) +// } +// } else { +// print("load failed") +// } +// self.runner1.clear() +// } +// return +// DispatchQueue.global().async { +//// sleep(1) +// let image1 = UIImage.init(named: "banana.jpeg") +//// if success { +// for _ in 0..<10 { +// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in +// print("result2: ") +// print(res) +// }) +// } +//// } else { +//// print("load failed") +//// } +//// self.runner2.clear() +// } + } +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift new file mode 100644 index 0000000000000000000000000000000000000000..d314e8b3f8845ef95b36b4b25e61809d353f0f24 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MetalHelper.swift @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Metal +import MetalKit +import Foundation +import paddle_mobile + +class MetalHelper { + let device: MTLDevice + let queue: MTLCommandQueue + let textureLoader: MTKTextureLoader + static let shared: MetalHelper = MetalHelper.init() + private init(){ + device = MTLCreateSystemDefaultDevice()! + queue = device.makeCommandQueue()! + textureLoader = MTKTextureLoader.init(device: device) + } + + +} + diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift new file mode 100644 index 0000000000000000000000000000000000000000..a954328acae3a80643ad849d58cd6ac86bf7865e --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift @@ -0,0 +1,9 @@ +// +// PaddleMobile.swift +// paddle-mobile-demo +// +// Created by liuRuiLong on 2018/9/5. +// Copyright © 2018年 orange. All rights reserved. +// + +import Foundation diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..ac07e449bc5919a37a57143aa6881f79507a45b4 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal @@ -0,0 +1,137 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +kernel void mobilenet_preprocess( + texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], + uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f); + const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void mobilenet_preprocess_half( + texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], + uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f); + const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void mobilenet_ssd_preprocess( + texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], + uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f); + const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void mobilenet_ssd_preprocess_half( + texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], + uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f); + const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void genet_preprocess(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f); + const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void genet_preprocess_half(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f); + const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void mobilent_ar_preprocess(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f); + const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void mobilent_ar_preprocess_half(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) +{ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + const auto means = half4(128.0f, 128.0f, 128.0f, 0.0f); + const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; + outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); +} + +kernel void scale(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) return; + float w_stride = inTexture.get_width() / outTexture.get_width(); + float h_stride = inTexture.get_height() / outTexture.get_height(); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0); + outTexture.write(input, gid); +} + +kernel void scale_half(texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) return; + float w_stride = inTexture.get_width() / outTexture.get_width(); + float h_stride = inTexture.get_height() / outTexture.get_height(); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0); + outTexture.write(half4(input), gid); +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal deleted file mode 100644 index f359ab39ac5fbc18febfb6f0da367e72b61b959c..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal +++ /dev/null @@ -1,44 +0,0 @@ -// -// PreProcessKernel.metal -// paddle-mobile-demo -// -// Created by liuRuiLong on 2018/7/20. -// Copyright © 2018年 orange. All rights reserved. -// - -#include -using namespace metal; - - -kernel void preprocess( - texture2d inTexture [[texture(0)]], - texture2d outTexture [[texture(1)]], - uint2 gid [[thread_position_in_grid]]) -{ - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height()) { - return; - } - const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f); - const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; - outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); -} - -kernel void preprocess_half( - texture2d inTexture [[texture(0)]], - texture2d outTexture [[texture(1)]], - uint2 gid [[thread_position_in_grid]]) -{ - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height()) { - return; - } - const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f); - const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; - outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); -} - - - - - diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift new file mode 100644 index 0000000000000000000000000000000000000000..f9e841f9c2a3060e775726023b6d5cfc3eeb679d --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift @@ -0,0 +1,31 @@ + + +import Foundation +import QuartzCore + +public class FPSCounter { + private(set) public var fps: Double = 0 + + var frames = 0 + var startTime: CFTimeInterval = 0 + + public func start() { + frames = 0 + startTime = CACurrentMediaTime() + } + + public func frameCompleted() { + frames += 1 + let now = CACurrentMediaTime() + let elapsed = now - startTime + if elapsed > 0.1 { + let current = Double(frames) / elapsed + let smoothing = 0.75 + fps = smoothing*fps + (1 - smoothing)*current + if elapsed > 1 { + frames = 0 + startTime = CACurrentMediaTime() + } + } + } +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift new file mode 100644 index 0000000000000000000000000000000000000000..c235ed2f0391bdc97e9e182c0e9897814a0518fa --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift @@ -0,0 +1,218 @@ + +import UIKit +import Metal +import CoreVideo +import AVFoundation + +@available(iOS 10.0, *) +@objc public protocol VideoCaptureDelegate: NSObjectProtocol { + @objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime) + @objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) + @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?) + @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?) +} + +/** + Simple interface to the iPhone's camera. +*/ +@available(iOS 10.0, *) +public class VideoCapture: NSObject { + public var previewLayer: AVCaptureVideoPreviewLayer? + public weak var delegate: VideoCaptureDelegate? + public var fps = -1 + private let device: MTLDevice? + private let videoOrientation: AVCaptureVideoOrientation + private var textureCache: CVMetalTextureCache? + private let captureSession = AVCaptureSession() + private let videoOutput = AVCaptureVideoDataOutput() + private let photoOutput = AVCapturePhotoOutput() + private let queue = DispatchQueue(label: "net.machinethink.camera-queue") + private var lastTimestamp = CMTime() + private let cameraPosition: AVCaptureDevice.Position + public init(device: MTLDevice? = nil, orientation: AVCaptureVideoOrientation = .portrait, position: AVCaptureDevice.Position = .back) { + self.device = device + self.videoOrientation = orientation + self.cameraPosition = position + super.init() + } + + public func setUp(sessionPreset: AVCaptureSession.Preset = .medium, + completion: @escaping (Bool) -> Void) { + queue.async { + let success = self.setUpCamera(sessionPreset: sessionPreset) + DispatchQueue.main.async { + completion(success) + } + } + } + + func fontCamera() -> AVCaptureDevice? { + let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices + return deveices.first + + } + + func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { + if let inDevice = device{ + guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, inDevice, nil, &textureCache) == kCVReturnSuccess else { + print("Error: could not create a texture cache") + return false + } + } + + captureSession.beginConfiguration() + captureSession.sessionPreset = sessionPreset + + var oCaptureDevice: AVCaptureDevice? + switch cameraPosition { + case .back: + oCaptureDevice = AVCaptureDevice.default(for: AVMediaType.video) + break + case .front: + oCaptureDevice = fontCamera() + break + default: + break + } + + guard let captureDevice = oCaptureDevice else { + print("Error: no video devices available") + return false + } + + guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { + print("Error: could not create AVCaptureDeviceInput") + return false + } + + if captureSession.canAddInput(videoInput) { + captureSession.addInput(videoInput) + } + + let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) + previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect + previewLayer.connection?.videoOrientation = self.videoOrientation + self.previewLayer = previewLayer + + let settings: [String : Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) + ] + + videoOutput.videoSettings = settings + videoOutput.alwaysDiscardsLateVideoFrames = true + videoOutput.setSampleBufferDelegate(self, queue: queue) + if captureSession.canAddOutput(videoOutput) { + captureSession.addOutput(videoOutput) + } + + // We want the buffers to be in portrait orientation otherwise they are + // rotated by 90 degrees. Need to set this _after_ addOutput()! + videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation + + if captureSession.canAddOutput(photoOutput) { + captureSession.addOutput(photoOutput) + } + + captureSession.commitConfiguration() + return true + } + + public func start() { + if !captureSession.isRunning { + captureSession.startRunning() + } + } + + public func stop() { + if captureSession.isRunning { + captureSession.stopRunning() + } + } + + /* Captures a single frame of the camera input. */ + public func capturePhoto() { + let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)]) + settings.previewPhotoFormat = [ + kCVPixelBufferPixelFormatTypeKey as String: settings.__availablePreviewPhotoPixelFormatTypes[0], + kCVPixelBufferWidthKey as String: 480, + kCVPixelBufferHeightKey as String: 360, + ] + photoOutput.capturePhoto(with: settings, delegate: self) + } + + func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? { + if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + let width = CVPixelBufferGetWidth(imageBuffer) + let height = CVPixelBufferGetHeight(imageBuffer) + var texture: CVMetalTexture? + CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache, imageBuffer, nil, .bgra8Unorm, width, height, 0, &texture) + if let texture = texture { + return CVMetalTextureGetTexture(texture) + } + } + return nil + } + + func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? { + if let sampleBuffer = sampleBuffer, + let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + let width = CVPixelBufferGetWidth(imageBuffer) + let height = CVPixelBufferGetHeight(imageBuffer) + let rect = CGRect(x: 0, y: 0, width: CGFloat(width), height: CGFloat(height)) + let ciImage = CIImage(cvPixelBuffer: imageBuffer) + let ciContext = CIContext(options: nil) + if let cgImage = ciContext.createCGImage(ciImage, from: rect) { + return UIImage(cgImage: cgImage) + } + } + return nil + } +} + + +@available(iOS 10.0, *) +extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { + public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + // Because lowering the capture device's FPS looks ugly in the preview, + // we capture at full speed but only call the delegate at its desired + // framerate. If `fps` is -1, we run at the full framerate. + let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) + let deltaTime = timestamp - lastTimestamp + if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) { + lastTimestamp = timestamp + self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp) + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{ + let texture = convertToMTLTexture(sampleBuffer: sampleBuffer) + delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp) + } + } + } + + public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + print("dropped frame") + } +} + +@available(iOS 10.0, *) +extension VideoCapture: AVCapturePhotoCaptureDelegate { + public func photoOutput(_ captureOutput: AVCapturePhotoOutput, + didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?, + previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?, + resolvedSettings: AVCaptureResolvedPhotoSettings, + bracketSettings: AVCaptureBracketedStillImageSettings?, + error: Error?) { + var imageTexture: MTLTexture? + var previewImage: UIImage? + if error == nil { + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{ + imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer) + self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture) + } + + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{ + previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer) + self.delegate?.videoCapture?(self, didCapturePhoto: previewImage) + } + } + } +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index 30fdaf078556bdc4546aec4f27e153f469d9e5ac..1c6d0a91c9bf1d202091282e43859270a238edaa 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -14,164 +14,292 @@ import UIKit import MetalKit +import CoreMedia import paddle_mobile import MetalPerformanceShaders -let threadSupport = [1] +var platform: Platform = .GPU +let threadSupport: [(Platform, String)] = [(.GPU, "GPU"), (.CPU, "CPU")] + +//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), +let modelHelperMap: [SupportModel : Runner] = [ + .genet : Runner.init(inNet: Genet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), + .mobilenet_ssd_ar : Runner.init(inNet: MobileNet_ssd_AR.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)] +//, .genet : Genet.init() +//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()] + +let netSupport: [SupportModel : Net] = [.genet : Genet.init(device: MetalHelper.shared.device), .mobilenet_ssd_ar : MobileNet_ssd_AR.init(device: MetalHelper.shared.device)] + +enum SupportModel: String{ + // case mobilenet = "mobilenet" +// case mobilenet_ssd = "mobilenetssd" + case genet = "genet" + case mobilenet_ssd_ar = "mobilenetssd_ar" + + static func supportedModels() -> [SupportModel] { + // .mobilenet, + // .mobilenet_ssd, + return [.genet, .mobilenet_ssd_ar] + } +} class ViewController: UIViewController { - @IBOutlet weak var resultTextView: UITextView! - @IBOutlet weak var selectImageView: UIImageView! - @IBOutlet weak var elapsedTimeLabel: UILabel! - @IBOutlet weak var modelPickerView: UIPickerView! - @IBOutlet weak var threadPickerView: UIPickerView! - var selectImage: UIImage? - var program: Program? - var executor: Executor? - var modelType: SupportModel = .mobilenet - var toPredictTexture: MTLTexture? - var modelHelper: ModelHelper { - return modelHelperMap[modelType] ?! " has no this type " - } - var threadNum = 1 + @IBOutlet weak var resultTextView: UITextView! + @IBOutlet weak var selectImageView: UIImageView! + @IBOutlet weak var elapsedTimeLabel: UILabel! + @IBOutlet weak var modelPickerView: UIPickerView! + @IBOutlet weak var threadPickerView: UIPickerView! + @IBOutlet weak var videoView: UIView! +// var videoCapture: VideoCapture! + + var selectImage: UIImage? + var inputPointer: UnsafeMutablePointer? + var modelType: SupportModel = SupportModel.supportedModels()[0] + var toPredictTexture: MTLTexture? + + var runner: Runner! + + var threadNum = 1 + + @IBAction func loadAct(_ sender: Any) { + runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue, inPlatform: platform) - @IBAction func loadAct(_ sender: Any) { - let inModelHelper = modelHelper - let queue = MetalHelper.shared.queue - let loader = Loader.init() - do { - let modelPath = inModelHelper.modelPath - let paraPath = inModelHelper.paramPath - - program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath) - executor = try Executor.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!) - } catch let error { - print(error) + if platform == .CPU { + if inputPointer == nil { + inputPointer = runner.preproccess(image: selectImage!.cgImage!) + + } + } else if platform == .GPU { + if self.toPredictTexture == nil { + runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in + self?.toPredictTexture = texture } + } + } else { + fatalError( " unsupport " ) } - @IBAction func selectImageAct(_ sender: Any) { - let imagePicker = UIImagePickerController() - imagePicker.sourceType = .camera - imagePicker.delegate = self - self.present(imagePicker, animated: true, completion: nil) - } - - @IBAction func clearAct(_ sender: Any) { - executor?.clear() - program = nil - executor = nil - + if runner.load() { + print(" load success ! ") + } else { + print(" load error ! ") } - - @IBAction func predictAct(_ sender: Any) { - guard let inTexture = toPredictTexture else { - resultTextView.text = "请选择图片 ! " - return + } + + @IBAction func selectImageAct(_ sender: Any) { + let imagePicker = UIImagePickerController() + imagePicker.sourceType = .camera + imagePicker.delegate = self + self.present(imagePicker, animated: true, completion: nil) + } + + @IBAction func clearAct(_ sender: Any) { + runner.clear() + } + + @IBAction func predictAct(_ sender: Any) { + let max = 50 + switch platform { + case .GPU: + guard let inTexture = toPredictTexture else { + resultTextView.text = "请选择图片 ! " + return + } + + for _ in 0..<10{ + runner.predict(texture: inTexture) { (success, resultHolder) in + resultHolder?.releasePointer() + } + } + + let startDate = Date.init() + for i in 0.. Int { - if pickerView == modelPickerView { - return 1 - } else if pickerView == threadPickerView { - return 1 - } else { - fatalError() - } + func numberOfComponents(in pickerView: UIPickerView) -> Int { + if pickerView == modelPickerView { + return 1 + } else if pickerView == threadPickerView { + return 1 + } else { + fatalError() } - - func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int { - if pickerView == modelPickerView { - return SupportModel.supportedModels().count - } else if pickerView == threadPickerView { - return threadSupport.count - } else { - fatalError() - } + } + + func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int { + if pickerView == modelPickerView { + return SupportModel.supportedModels().count + } else if pickerView == threadPickerView { + return threadSupport.count + } else { + fatalError() } - - public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? { - if pickerView == modelPickerView { - return SupportModel.supportedModels()[row].rawValue - } else if pickerView == threadPickerView { - return "\(threadSupport[row])" - } else { - fatalError() - } + } + + public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? { + if pickerView == modelPickerView { + return SupportModel.supportedModels()[row].rawValue + } else if pickerView == threadPickerView { + return threadSupport[row].1 + } else { + fatalError() } - - public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) { - if pickerView == modelPickerView { - self.modelType = SupportModel.supportedModels()[row] - } else if pickerView == threadPickerView { - self.threadNum = threadSupport[row] - } else { - fatalError() - } + } + + public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) { + if pickerView == modelPickerView { + self.modelType = SupportModel.supportedModels()[row] + } else if pickerView == threadPickerView { + + platform = threadSupport[row].0 + } else { + fatalError() } + } } extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { - func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { - picker.dismiss(animated: true){[weak self] in - guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ - fatalError("no image") - } - sSelf.selectImage = image - sSelf.selectImageView.image = image - sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in - sSelf.toPredictTexture = texture - }) - } + func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { + picker.dismiss(animated: true){[weak self] in + guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ + fatalError("no image") + } + sSelf.selectImage = image + sSelf.selectImageView.image = image + sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in + sSelf.toPredictTexture = texture + }) } + } } +var bool1 = false +extension ViewController: VideoCaptureDelegate{ + func predictTexture(texture: MTLTexture){ + runner.scaleTexture(input: texture) { (scaledTexture) in + self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in +// print(resultHolder!.result![0]) + resultHolder?.releasePointer() + }) + } + } + + +// @available(iOS 10.0, *) +// func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) { +//// if !bool1 { +//// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) { +// self.predictTexture(texture: texture!) +//// } +// +// +//// bool1 = true +//// } +// +// } + +} + + + diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h b/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h new file mode 100644 index 0000000000000000000000000000000000000000..92de82860ccd372ba0eae962edd1b271986f1862 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/paddle-mobile-demo-Bridging-Header.h @@ -0,0 +1,5 @@ +// +// Use this file to import your target's public headers that you would like to expose to Swift. +// + +#import diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist deleted file mode 100644 index 994fb8e4886aba91298c168a1b06888d8825b655..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist +++ /dev/null @@ -1,14 +0,0 @@ - - - - - SchemeUserState - - paddle-mobile-unit-test.xcscheme - - orderHint - 6 - - - - diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift index 6ab6f7c05e30049e850170409efcd6f049c73abe..7817befaedf1aff04b75abd39cc6f7f06bc935d3 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift @@ -1,10 +1,16 @@ -// -// AppDelegate.swift -// paddle-mobile-unit-test -// -// Created by liuRuiLong on 2018/8/10. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import UIKit diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift index d57b610e4d10f02d2eace4892a6d55eda8f2c9b9..98f03affa2a230b2698edf6bafe5e06def8986b6 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift @@ -1,18 +1,34 @@ -// -// ViewController.swift -// paddle-mobile-unit-test -// -// Created by liuRuiLong on 2018/8/10. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import UIKit +import Metal +//import MetalKit import paddle_mobile class ViewController: UIViewController { - override func viewDidLoad() { super.viewDidLoad() + let device = Metal.MTLCreateSystemDefaultDevice()! + let queue = device.makeCommandQueue()! + let test = PaddleMobileUnitTest.init( + inDevice: device, + inQueue: queue + ) + test.testConcat() +// test.testReshape() +// test.testTranspose() print(" done ") } diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj index 6bceab43210c42ef83a2152463caf3bc8917b8c8..34d45528542d0d6a9d5ac153a7d6f818d962cbfd 100644 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj @@ -7,7 +7,31 @@ objects = { /* Begin PBXBuildFile section */ + 4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; }; + 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; }; + 4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; }; + 4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8B2146640900D0F791 /* SplitOp.swift */; }; + 4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */; }; + 4AA1EA90214664CD00D0F791 /* Split.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA8F214664CD00D0F791 /* Split.metal */; }; + 4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; }; + 4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; }; + 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; }; + 4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */ = {isa = PBXBuildFile; fileRef = 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */; }; + 4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; }; + 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; }; + 4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; }; + 4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA5214B5F6800D0F791 /* Shape.metal */; }; + 4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */; }; + 4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */; }; + 4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */; }; + 4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */; }; + 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; }; + 4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; }; + 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; }; + 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; }; D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; }; + FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226552138F33800F395E2 /* TransposeKernel.metal */; }; + FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC0226572138F38D00F395E2 /* PoolKernel.metal */; }; FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; }; FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; }; @@ -35,17 +59,54 @@ FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; }; FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; }; FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; }; - FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; }; + FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */; settings = {ATTRIBUTES = (Public, ); }; }; + FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC292C5521421B4600CF622F /* PaddleMobileGPU.m */; }; + FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7C214255BC00CF622F /* CPUCompute.mm */; }; + FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7E214255BC00CF622F /* MobileNetSSD.swift */; }; + FC292C85214257CB00CF622F /* CPUCompute.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C7D214255BC00CF622F /* CPUCompute.h */; settings = {ATTRIBUTES = (Public, ); }; }; + FC292C872142624800CF622F /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C862142624800CF622F /* Genet.swift */; }; + FC33B0F02147659000714A93 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC33B0EF2147659000714A93 /* MobileNet.swift */; }; FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; }; FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; }; FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; }; + FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; }; + FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; }; + FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; }; + FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; }; FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; }; FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; }; + FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; }; + FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */; }; + FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */; }; + FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */; }; + FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; }; + FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; }; FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; }; + FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; }; FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; }; FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; }; FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; }; FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; }; + FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */; }; + FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1642132A5EB00084FE5 /* Common.metal */; }; + FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */; }; + FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; }; + FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; }; + FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; }; + FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; }; + FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; }; + FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; }; + FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */; }; + FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */; }; + FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */; }; + FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */; }; + FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC642122FCD700D94F7E /* TransposeOp.swift */; }; + FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC66212306B000D94F7E /* ConcatOp.swift */; }; + FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC68212306D300D94F7E /* ConcatKernel.swift */; }; + FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */; }; + FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; }; + FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; }; + FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; }; FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; }; FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; }; FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; }; @@ -55,15 +116,55 @@ FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; }; FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; }; FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; }; + FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; }; + FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; }; + FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; }; + FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; }; + FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; }; + FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; }; + FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */; }; + FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */; }; + FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */; }; + FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */; }; + FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */; }; + FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */; }; + FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; }; + FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; }; + FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; }; + FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; }; FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; }; FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; }; FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = ""; }; + 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = ""; }; + 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = ""; }; + 4AA1EA8B2146640900D0F791 /* SplitOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitOp.swift; sourceTree = ""; }; + 4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SplitKernel.swift; sourceTree = ""; }; + 4AA1EA8F214664CD00D0F791 /* Split.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.metal; sourceTree = ""; }; + 4AA1EA91214665D700D0F791 /* ShapeOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeOp.swift; sourceTree = ""; }; + 4AA1EA932146661500D0F791 /* ShapeKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ShapeKernel.swift; sourceTree = ""; }; + 4AA1EA972146666500D0F791 /* FlattenOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenOp.swift; sourceTree = ""; }; + 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ConcatKernel.inc.metal; sourceTree = ""; }; + 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = ReshapeKernel.inc.metal; sourceTree = ""; }; + 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FlattenKernel.swift; sourceTree = ""; }; + 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Split.inc.metal; sourceTree = ""; }; + 4AA1EAA5214B5F6800D0F791 /* Shape.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Shape.metal; sourceTree = ""; }; + 4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.inc.metal; sourceTree = ""; }; + 4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.inc.metal; sourceTree = ""; }; + 4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.inc.metal; sourceTree = ""; }; + 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = TransposeKernel.inc.metal; sourceTree = ""; }; + 4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = ""; }; + 4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = ""; }; + 4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = ""; }; + 4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = ""; }; CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = ""; }; DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = ""; }; + FC0226552138F33800F395E2 /* TransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = TransposeKernel.metal; sourceTree = ""; }; + FC0226572138F38D00F395E2 /* PoolKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.metal; sourceTree = ""; }; FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = ""; }; FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; @@ -93,17 +194,54 @@ FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = ""; }; FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = ""; }; FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = ""; }; - FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = ""; }; + FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = ""; }; + FC292C5521421B4600CF622F /* PaddleMobileGPU.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = ""; }; + FC292C7C214255BC00CF622F /* CPUCompute.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = ""; }; + FC292C7D214255BC00CF622F /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = ""; }; + FC292C7E214255BC00CF622F /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = ""; }; + FC292C862142624800CF622F /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = ""; }; + FC33B0EF2147659000714A93 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = ""; }; FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = ""; }; FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = ""; }; FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = ""; }; + FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = ""; }; + FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = ""; }; + FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = ""; }; + FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; }; FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = ""; }; FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = ""; }; + FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluOp.swift; sourceTree = ""; }; + FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluKernel.swift; sourceTree = ""; }; + FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPreluKernel.metal; sourceTree = ""; }; + FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPrelu.inc.metal; sourceTree = ""; }; + FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = ""; }; + FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = ""; }; FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = ""; }; + FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = ""; }; FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = ""; }; FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = ""; }; FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = ""; }; FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = ""; }; + FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReshapeKernel.metal; sourceTree = ""; }; + FCA3A1642132A5EB00084FE5 /* Common.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Common.metal; sourceTree = ""; }; + FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvTransposeKernel.metal; sourceTree = ""; }; + FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = ""; }; + FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = ""; }; + FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = ""; }; + FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = ""; }; + FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = ""; }; + FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = ""; }; + FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DepthwiseConvOp.swift; sourceTree = ""; }; + FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxOp.swift; sourceTree = ""; }; + FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PriorBoxKernel.swift; sourceTree = ""; }; + FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeKernel.swift; sourceTree = ""; }; + FCBCCC642122FCD700D94F7E /* TransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TransposeOp.swift; sourceTree = ""; }; + FCBCCC66212306B000D94F7E /* ConcatOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatOp.swift; sourceTree = ""; }; + FCBCCC68212306D300D94F7E /* ConcatKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConcatKernel.swift; sourceTree = ""; }; + FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderOp.swift; sourceTree = ""; }; + FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = ""; }; + FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = ""; }; + FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = ""; }; FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = ""; }; FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = ""; }; FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = ""; }; @@ -113,9 +251,25 @@ FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = ""; }; FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = ""; }; FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = ""; }; + FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = ""; }; + FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = ""; }; + FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = ""; }; + FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = ""; }; + FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = ""; }; + FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = ""; }; + FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluOp.swift; sourceTree = ""; }; + FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluKernel.swift; sourceTree = ""; }; + FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluOp.swift; sourceTree = ""; }; + FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluKernel.swift; sourceTree = ""; }; + FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.inc.metal; sourceTree = ""; }; + FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.metal; sourceTree = ""; }; + FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = ""; }; + FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = ""; }; + FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = ""; }; + FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = ""; }; FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; }; FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = ""; }; - FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; }; + FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -123,7 +277,9 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */, D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */, + FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -133,6 +289,7 @@ 336CBE234BF5DE48658DE65F /* Frameworks */ = { isa = PBXGroup; children = ( + FC4FD97D2140F2C30073E130 /* libstdc++.tbd */, DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */, ); name = Frameworks; @@ -168,10 +325,19 @@ FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = { isa = PBXGroup; children = ( + FCE9D7B6214F869000B520C3 /* Net.swift */, + FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */, + FC33B0EF2147659000714A93 /* MobileNet.swift */, + FC292C862142624800CF622F /* Genet.swift */, + FC292C7E214255BC00CF622F /* MobileNetSSD.swift */, + FC292C7C214255BC00CF622F /* CPUCompute.mm */, + FC292C7D214255BC00CF622F /* CPUCompute.h */, + FC292C5521421B4600CF622F /* PaddleMobileGPU.m */, + FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */, + FC4FD9762140E4920073E130 /* CPU */, + FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */, FC039BAE20E11CC20081E9F8 /* Program */, FC039BA320E11CBC0081E9F8 /* Operators */, - FC039BA120E11CB70081E9F8 /* Loader.swift */, - FC039B9A20E11CA00081E9F8 /* Executor.swift */, FC039B9C20E11CB20081E9F8 /* framework */, FC039B9320E11C9A0081E9F8 /* Common */, FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */, @@ -196,6 +362,8 @@ FC039B9C20E11CB20081E9F8 /* framework */ = { isa = PBXGroup; children = ( + FC039BA120E11CB70081E9F8 /* Loader.swift */, + FC039B9A20E11CA00081E9F8 /* Executor.swift */, FC039B9D20E11CB20081E9F8 /* Tensor.swift */, FC039B9E20E11CB20081E9F8 /* Dim.swift */, FC9D038320E23B01000F735A /* Texture.swift */, @@ -219,6 +387,23 @@ FCD04E6920F319EC0007374F /* SoftmaxOp.swift */, FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */, FCD04E7120F343420007374F /* ConvAddOp.swift */, + FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */, + FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */, + FCBCCC5C2122F8A100D94F7E /* DepthwiseConvOp.swift */, + FCBCCC5E2122FB3B00D94F7E /* PriorBoxOp.swift */, + FCBCCC642122FCD700D94F7E /* TransposeOp.swift */, + FCBCCC66212306B000D94F7E /* ConcatOp.swift */, + FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */, + 4AA1EA8B2146640900D0F791 /* SplitOp.swift */, + 4AA1EA91214665D700D0F791 /* ShapeOp.swift */, + 4AA1EA972146666500D0F791 /* FlattenOp.swift */, + 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */, + FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */, + FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */, + FCEB684B212F093800D2448E /* PreluOp.swift */, + FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */, + FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */, + FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */, ); path = Operators; sourceTree = ""; @@ -243,24 +428,46 @@ FC086BA520E67E8500D85EF7 /* Kernels */ = { isa = PBXGroup; children = ( + FCDDC6CD212FE02100E5EF74 /* Base */, + FCEB6837212F00B100D2448E /* metal */, + FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */, FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */, - FCF2D73720E64E70007AC5F5 /* Kernel.swift */, - FC1B16B220EC9A4F00678B91 /* Kernels.metal */, - FC1B186520ECF1C600678B91 /* ResizeKernel.swift */, FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */, FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */, FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */, FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */, - FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */, FCD04E6720F315020007374F /* PoolKernel.swift */, FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */, FCD04E6F20F31B720007374F /* ReshapeKernel.swift */, + 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */, FCD04E7320F3437E0007374F /* ConvAddKernel.swift */, + FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */, + FCBCCC602122FBDF00D94F7E /* PriorBoxKernel.swift */, + FCBCCC622122FCC000D94F7E /* TransposeKernel.swift */, + FCBCCC68212306D300D94F7E /* ConcatKernel.swift */, + FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */, + 4AA1EA8D2146647F00D0F791 /* SplitKernel.swift */, + 4AA1EA932146661500D0F791 /* ShapeKernel.swift */, + 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */, + FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */, + FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */, + FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */, + FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */, + FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */, ); path = Kernels; sourceTree = ""; }; + FC4FD9762140E4920073E130 /* CPU */ = { + isa = PBXGroup; + children = ( + FC4FD9782140E4980073E130 /* libpaddle-mobile.a */, + FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */, + ); + path = CPU; + sourceTree = ""; + }; FCD592FA20E248EC00252966 /* Base */ = { isa = PBXGroup; children = ( @@ -271,6 +478,56 @@ path = Base; sourceTree = ""; }; + FCDDC6CD212FE02100E5EF74 /* Base */ = { + isa = PBXGroup; + children = ( + FCF2D73720E64E70007AC5F5 /* Kernel.swift */, + ); + path = Base; + sourceTree = ""; + }; + FCEB6837212F00B100D2448E /* metal */ = { + isa = PBXGroup; + children = ( + 4AF928812135673D005B6C3A /* ConcatKernel.metal */, + 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */, + 4AF9288321357BE3005B6C3A /* Elementwise.metal */, + FC1B16B220EC9A4F00678B91 /* Kernels.metal */, + FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, + 4AF928762133F1DB005B6C3A /* BoxCoder.metal */, + 4AA1EAA9214F53D800D0F791 /* BoxCoder.inc.metal */, + 4AA1EAA5214B5F6800D0F791 /* Shape.metal */, + 4AA1EA8F214664CD00D0F791 /* Split.metal */, + 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */, + 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */, + 4AA1EAA7214B7AFB00D0F791 /* BilinearInterp.inc.metal */, + 4AF9287821341661005B6C3A /* Softmax.metal */, + 4AA1EAAB214F55C800D0F791 /* Softmax.inc.metal */, + FCEB6849212F00DB00D2448E /* PreluKernel.metal */, + FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */, + FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */, + FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */, + FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */, + 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */, + FCA3A1642132A5EB00084FE5 /* Common.metal */, + FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */, + FCA67CD42138272900BD58AA /* ConvAddMetal.metal */, + FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */, + FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */, + FC0226552138F33800F395E2 /* TransposeKernel.metal */, + 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */, + FC0226572138F38D00F395E2 /* PoolKernel.metal */, + FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */, + FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */, + FC803BC6214CBA820094B8E5 /* Macro.metal */, + FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */, + FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */, + FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */, + FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */, + ); + path = metal; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ @@ -278,6 +535,10 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */, + FC292C85214257CB00CF622F /* CPUCompute.h in Headers */, + FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */, + 4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */, FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -315,6 +576,7 @@ TargetAttributes = { FC039B6920E11C3C0081E9F8 = { CreatedOnToolsVersion = 9.3.1; + LastSwiftMigration = 0940; }; }; }; @@ -372,53 +634,124 @@ buildActionMask = 2147483647; files = ( FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */, + 4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */, FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, + FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */, + FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */, + 4AF9287921341661005B6C3A /* Softmax.metal in Sources */, + 4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */, FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */, FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */, + FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */, + FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */, + 4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */, + 4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */, + FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, + 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, + FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, + FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, - FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */, FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */, + FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */, + FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */, + FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */, + FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */, + FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, + 4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */, + FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */, FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */, + 4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, + 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */, + FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */, + FC33B0F02147659000714A93 /* MobileNet.swift in Sources */, + FCEB684C212F093800D2448E /* PreluOp.swift in Sources */, + 4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */, + FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */, FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */, FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */, + 4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */, + FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */, FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */, FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */, FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */, + FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */, + FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */, + FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */, FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */, FC9D038420E23B01000F735A /* Texture.swift in Sources */, + FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */, + 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, + 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */, + FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */, + 4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */, FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */, FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */, FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */, + 4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */, FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */, FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */, + FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */, FCD04E6620F314C50007374F /* PoolOp.swift in Sources */, + FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */, FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, + FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */, FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, + FC292C872142624800CF622F /* Genet.swift in Sources */, + FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */, + 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */, + FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */, + FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */, FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */, FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */, + 4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */, + FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */, + FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */, + FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */, FC82735920E3C04200BE430A /* OpCreator.swift in Sources */, + FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */, + 4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */, + FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */, + FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */, + FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */, + FCE9D7B7214F869000B520C3 /* Net.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */, FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */, + FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */, + FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */, + FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */, FC9D038220E2312E000F735A /* FetchOp.swift in Sources */, + FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */, FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */, FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */, + FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */, FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */, + FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */, + 4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */, + FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */, + FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, + FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, + FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */, FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, + FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */, + 4AA1EA90214664CD00D0F791 /* Split.metal in Sources */, FCD04E6820F315020007374F /* PoolKernel.swift in Sources */, + FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */, FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */, + FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */, FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */, + 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */, FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -550,6 +883,7 @@ isa = XCBuildConfiguration; baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */; buildSettings = { + CLANG_ENABLE_MODULES = YES; CODE_SIGN_IDENTITY = ""; CODE_SIGN_STYLE = Automatic; DEFINES_MODULE = YES; @@ -557,6 +891,7 @@ DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; + ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile/Info.plist"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; IPHONEOS_DEPLOYMENT_TARGET = 9.0; @@ -565,10 +900,16 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/paddle-mobile/CPU", + ); + MACH_O_TYPE = mh_dylib; MTL_LANGUAGE_REVISION = UseDeploymentTarget; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; SKIP_INSTALL = YES; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_VERSION = 4.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -578,6 +919,7 @@ isa = XCBuildConfiguration; baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */; buildSettings = { + CLANG_ENABLE_MODULES = YES; CODE_SIGN_IDENTITY = ""; CODE_SIGN_STYLE = Automatic; DEFINES_MODULE = YES; @@ -585,6 +927,7 @@ DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_CURRENT_VERSION = 1; DYLIB_INSTALL_NAME_BASE = "@rpath"; + ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile/Info.plist"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; IPHONEOS_DEPLOYMENT_TARGET = 9.0; @@ -593,6 +936,11 @@ "@executable_path/Frameworks", "@loader_path/Frameworks", ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/paddle-mobile/CPU", + ); + MACH_O_TYPE = mh_dylib; MTL_LANGUAGE_REVISION = UseDeploymentTarget; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme deleted file mode 100644 index 7c83f42ceca9f68af4f45064cb29c9e3a3512b8e..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme +++ /dev/null @@ -1,80 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist deleted file mode 100644 index 067e2a5bea9382a8f2ffebfd809d2c8217631975..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist +++ /dev/null @@ -1,22 +0,0 @@ - - - - - SchemeUserState - - paddle-mobile.xcscheme - - orderHint - 0 - - - SuppressBuildableAutocreation - - FC039B6920E11C3C0081E9F8 - - primary - - - - - diff --git a/metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h b/metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h new file mode 100644 index 0000000000000000000000000000000000000000..c68d81f328f4ce9a9bf16624f677b2996644c35c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/CPU/PaddleMobileCPU.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#import +#import + +@interface PaddleMobileCPUResult: NSObject + +@property (assign, nonatomic, readonly) float *output; + +@property (assign, nonatomic, readonly) int outputSize; + +-(void)releaseOutput; + +@end + +@interface PaddleMobileCPU : NSObject + +/* + 创建对象 +*/ +- (instancetype)init; + +/* + load 模型, 开辟内存 +*/ +- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath; + +/* + 加载散开形式的模型, 需传入模型的目录 +*/ +- (BOOL)load:(NSString *)modelAndWeightPath; + +/* + * 从内存中加载模型 + * */ +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf; + +/* + * 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存 + * */ +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim; + +/* + * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放 + * */ +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim; + +/* + 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict +*/ +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; + +/* + 进行预测, 默认 means 为 0, scale 为 1.0 +*/ +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; + +/* + 清理内存 +*/ +- (void)clear; + +@end diff --git a/metal/paddle-mobile/paddle-mobile/CPUCompute.h b/metal/paddle-mobile/paddle-mobile/CPUCompute.h new file mode 100644 index 0000000000000000000000000000000000000000..ed12dd60df4ea06944fdf4ff9b635fc12a99120e --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/CPUCompute.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#import + + +@interface CPUResult: NSObject +@property (assign, nonatomic) float *output; +@property (assign, nonatomic) int outputSize; +@end + +@interface NMSCompute: NSObject + +@property (assign, nonatomic) float scoreThredshold; + +@property (assign, nonatomic) int nmsTopK; + +@property (assign, nonatomic) int keepTopK; + +@property (assign, nonatomic) float nmsEta; + +@property (assign, nonatomic) float nmsThreshold; + +@property (assign, nonatomic) int background_label; + +@property (strong, nonatomic) NSArray *scoreDim; + +@property (strong, nonatomic) NSArray *bboxDim; + +-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox; + +@end diff --git a/metal/paddle-mobile/paddle-mobile/CPUCompute.mm b/metal/paddle-mobile/paddle-mobile/CPUCompute.mm new file mode 100644 index 0000000000000000000000000000000000000000..b97153765b46bb63d604d8845eee08d91283481d --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/CPUCompute.mm @@ -0,0 +1,322 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + + +#import "CPUCompute.h" + +#import +#import +#import +#import + + + + +struct NMSParam { + + float *score_data; + + float *box_data; + + float *output; + + int output_size; + + std::vector score_dim; + + std::vector box_dim; + + float scoreThredshold; + + int nmsTopK; + + int keepTopK; + + float nmsEta; + + float nmsThreshold; + + int background_label; +}; + + +constexpr int kOutputDim = 6; +constexpr int kBBoxSize = 4; + +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} + +template +static inline void GetMaxScoreIndex( + const std::vector& scores, const T threshold, int top_k, + std::vector>* sorted_indices) { + for (size_t i = 0; i < scores.size(); ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +template +static inline T BBoxArea(const T* box, const bool normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return static_cast(0.); + } else { + const T w = box[2] - box[0]; + const T h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +template +static inline T JaccardOverlap(const T* box1, const T* box2, + const bool normalized) { + if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1]) { + return static_cast(0.); + } else { + const T inter_xmin = std::max(box1[0], box2[0]); + const T inter_ymin = std::max(box1[1], box2[1]); + const T inter_xmax = std::min(box1[2], box2[2]); + const T inter_ymax = std::min(box1[3], box2[3]); + const T inter_w = inter_xmax - inter_xmin; + const T inter_h = inter_ymax - inter_ymin; + const T inter_area = inter_w * inter_h; + const T bbox1_area = BBoxArea(box1, normalized); + const T bbox2_area = BBoxArea(box2, normalized); + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +template +static inline void NMSFast( + const T *bbox_data, + std::vector bbox_dim, + const T *score_data, + const T score_threshold, const T nms_threshold, + const T eta, const int top_k, + std::vector* selected_indices) { + // The total boxes for each instance. + int num_boxes = bbox_dim[0]; + // 4: [xmin ymin xmax ymax] + int box_size = bbox_dim[1]; + + std::vector scores_data(num_boxes); + std::copy_n(score_data, num_boxes, scores_data.begin()); + std::vector> sorted_indices; + GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices); + + selected_indices->clear(); + T adaptive_threshold = nms_threshold; + + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < selected_indices->size(); ++k) { + if (keep) { + const int kept_idx = (*selected_indices)[k]; + T overlap = JaccardOverlap(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, true); + keep = overlap <= adaptive_threshold; + } else { + break; + } + } + if (keep) { + selected_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } + } +} + +template +void MultiClassNMS(const T *boxes_data, + const std::vector &box_dim, + const T *scores_data, + const std::vector &score_dim, + std::map>* indices, int* num_nmsed_out, + const int& background_label, const int& nms_top_k, + const int& keep_top_k, const T& nms_threshold, + const T& nms_eta, const T& score_threshold) { + + int64_t class_num = score_dim[0]; + int64_t predict_dim = score_dim[1]; + int num_det = 0; + for (int c = 0; c < class_num; ++c) { + if (c == background_label) continue; + const T *score_data = scores_data + c * predict_dim; + + /// [c] is key + NMSFast(boxes_data, box_dim, score_data, score_threshold, nms_threshold, nms_eta, + nms_top_k, &((*indices)[c])); + num_det += (*indices)[c].size(); + } + + *num_nmsed_out = num_det; + if (keep_top_k > -1 && num_det > keep_top_k) { + std::vector>> score_index_pairs; + for (const auto& it : *indices) { + int label = it.first; + const T* sdata = scores_data + label * predict_dim; + const std::vector& label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + // PADDLE_ENFORCE_LT(idx, predict_dim); + score_index_pairs.push_back(std::make_pair(sdata[idx], std::make_pair(label, idx))); + } + } + // Keep top k results per image. + std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(keep_top_k); + + // Store the new indices. + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + new_indices.swap(*indices); + *num_nmsed_out = keep_top_k; + } +} + +template +void MultiClassOutput(const T *scores_data, + const std::vector &score_dim, + const T *bboxes_data, + T *outputs_data, + const std::map>& selected_indices) { + int predict_dim = score_dim[1]; + int count = 0; + for (const auto& it : selected_indices) { + /// one batch + int label = it.first; + const T* sdata = scores_data + label * predict_dim; + const std::vector& indices = it.second; + for (size_t j = 0; j < indices.size(); ++j) { + int idx = indices[j]; + const T* bdata = bboxes_data + idx * kBBoxSize; + outputs_data[count * kOutputDim] = label; // label + outputs_data[count * kOutputDim + 1] = sdata[idx]; // score + // xmin, ymin, xmax, ymax + std::memcpy(outputs_data + count * kOutputDim + 2, bdata, 4 * sizeof(T)); + count++; + } + } +} + +void MultiClassNMSCompute(NMSParam *param) { + assert(param->score_dim[0] == 1); + assert(param->box_dim[0] == 1); + assert (param->score_dim.size() == 3); + assert(param->box_dim.size() == 3); + + float* outputs; + auto background_label = param->background_label; + auto nms_top_k = param->nmsTopK; + auto keep_top_k = param->keepTopK; + auto nms_threshold = param->nmsThreshold; + auto nms_eta = param->nmsEta; + auto score_threshold = param->scoreThredshold; + + std::vector score_dim_one_batch = {param->score_dim[1], param->score_dim[2]}; + std::vector box_dim_one_batch = {param->box_dim[1], param->box_dim[2]}; + + std::vector batch_starts = {0}; + + std::map> indices; + int num_nmsed_out = 0; + + MultiClassNMS(param->box_data, box_dim_one_batch, param->score_data, score_dim_one_batch, &indices, &num_nmsed_out, + background_label, nms_top_k, keep_top_k, nms_threshold, + nms_eta, score_threshold); + batch_starts.push_back(batch_starts.back() + num_nmsed_out); + + int output_size = 0; + int num_kept = batch_starts.back(); + if (num_kept == 0) { + outputs = new float[1]; + outputs[0] = -1; + output_size = 1; + } else { + outputs = new float[num_kept * kOutputDim]; + int64_t s = batch_starts[0]; + int64_t e = batch_starts[1]; + if (e > s) { + MultiClassOutput(param->score_data, score_dim_one_batch, param->box_data, outputs, indices); + } + output_size = num_kept * kOutputDim; + } + param->output = outputs; + param->output_size = output_size; +} + +@implementation CPUResult +@end + +@implementation NMSCompute + +-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox { + NMSParam param; + param.box_data = bbox; + param.score_data = score; + param.background_label = self.background_label; + param.scoreThredshold = self.scoreThredshold; + param.nmsTopK = self.nmsTopK; + param.keepTopK = self.keepTopK; + param.nmsEta = self.nmsEta; + param.nmsThreshold = self.nmsThreshold; + std::vector score_dim; + for (int i = 0; i < self.scoreDim.count; ++i) { + score_dim.push_back(self.scoreDim[i].intValue); + } + param.score_dim = score_dim; + + std::vector box_dim; + for (int i = 0; i < self.bboxDim.count; ++i) { + box_dim.push_back(self.bboxDim[i].intValue); + } + param.box_dim = box_dim; + MultiClassNMSCompute(¶m); + CPUResult *cr = [[CPUResult alloc] init]; + cr.output = param.output; + cr.outputSize = param.output_size; + return cr; +} + +@end + + diff --git a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift index 62954ede17d493ae12aa104d13a75dbc062e98a0..4c38a1b7b42e21f88b3b1c8825c181bb83293a54 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift @@ -16,95 +16,110 @@ import Foundation // 自定义 ?! 如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息 precedencegroup ExecutedOrFatalError{ - associativity: left - higherThan: AssignmentPrecedence + associativity: left + higherThan: AssignmentPrecedence } infix operator ?!: ExecutedOrFatalError public func ?!(option: T?, excuteOrError: @autoclosure () -> String) -> T{ - if let inOpt = option { - return inOpt - }else{ - print(excuteOrError()) - fatalError(excuteOrError()) - } + if let inOpt = option { + return inOpt + }else{ + print(excuteOrError()) + fatalError(excuteOrError()) + } } //Lense struct Lense { - let from: (A) -> B - let to: (B, A) -> A + let from: (A) -> B + let to: (B, A) -> A } precedencegroup CombineLense{ - associativity: left - higherThan: AssignmentPrecedence + associativity: left + higherThan: AssignmentPrecedence } infix operator >>>: CombineLense func >>>(left: Lense, right: Lense) -> Lense { - return Lense.init(from: { (a) -> C in - left.from(right.from(a)) - }, to: { (c, a) -> A in - right.to( left.to(c, right.from(a)),a) - }) + return Lense.init(from: { (a) -> C in + left.from(right.from(a)) + }, to: { (c, a) -> A in + right.to( left.to(c, right.from(a)),a) + }) } protocol CIntIndex { - associatedtype T; - subscript(index: CInt) -> T { get set}; + associatedtype T; + subscript(index: CInt) -> T { get set}; } extension Array: CIntIndex{ - typealias T = Element - subscript(index: CInt) -> T { - get{ - guard Int64(Int.max) >= Int64(index) else{ - fatalError("cint index out of Int range") - } - return self[Int(index)] - } - set{ - guard Int64(Int.max) >= Int64(index) else{ - fatalError("cint index out of Int range") - } - self[Int(index)] = newValue - } - + typealias T = Element + subscript(index: CInt) -> T { + get{ + guard Int64(Int.max) >= Int64(index) else{ + fatalError("cint index out of Int range") + } + return self[Int(index)] + } + set{ + guard Int64(Int.max) >= Int64(index) else{ + fatalError("cint index out of Int range") + } + self[Int(index)] = newValue } + + } } extension Array where Element: AnyObject{ - mutating func remove(element: Element) { - if let index = index(where: { (node) -> Bool in - return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self) - }) { - remove(at: index) - } + mutating func remove(element: Element) { + if let index = index(where: { (node) -> Bool in + return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self) + }) { + remove(at: index) } - + } + } //MARK: Array extension extension Array where Element: Comparable{ - - /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回 - /// - /// - Parameter r: 前 r 个元素 - /// - Returns: [(原有位置, 排好位置的元素)] - public func top(r: Int) -> [(Int, Element)] { - precondition(r <= self.count) - return Array<(Int, Element)>(zip(0.. $1.1 }.prefix(through: r - 1)) + + /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回 + /// + /// - Parameter r: 前 r 个元素 + /// - Returns: [(原有位置, 排好位置的元素)] + public func top(r: Int) -> [(Int, Element)] { + precondition(r <= self.count) + return Array<(Int, Element)>(zip(0.. $1.1 }.prefix(through: r - 1)) + } +} + +extension Array { + public func strideArray(inCount: Int = 20) -> [(Int, Element)] { + if count < inCount { + return (0.. UnsafePointer? { - return (self as NSString).utf8String - } + func cStr() -> UnsafePointer? { + return (self as NSString).utf8String + } } func address(o: T) -> String { - return String.init(format: "%018p", unsafeBitCast(o, to: Int.self)) + return String.init(format: "%018p", unsafeBitCast(o, to: Int.self)) } diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index b750018260f64ae89f5b3aab5cc987eee9a11415..3be8c118613b3e9d6a9247fd731cc74392392d5b 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -18,263 +18,588 @@ fileprivate var defaultMetalLibrary: MTLLibrary? fileprivate var paddleMobileMetalLibrary: MTLLibrary? extension MTLDevice { - func defaultLibrary() -> MTLLibrary { - if defaultMetalLibrary == nil { - defaultMetalLibrary = makeDefaultLibrary() - } - if let inDefaultLib = defaultMetalLibrary { - return inDefaultLib - } else { - fatalError(" default metal libary is nil") - } + func defaultLibrary() -> MTLLibrary { + if defaultMetalLibrary == nil { + defaultMetalLibrary = makeDefaultLibrary() + } + if let inDefaultLib = defaultMetalLibrary { + return inDefaultLib + } else { + fatalError(" default metal libary is nil") + } + } + + func paddleMobileLibrary() -> MTLLibrary { + if paddleMobileMetalLibrary == nil { + guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else { + fatalError("Counld't find paddle mobile library") + } + do { + paddleMobileMetalLibrary = try makeLibrary(filepath: path) + } catch _ { + fatalError("Counld't load paddle mobile library") + } } - func paddleMobileLibrary() -> MTLLibrary { - if paddleMobileMetalLibrary == nil { - guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else { - fatalError("Counld't find paddle mobile library") - } - do { - paddleMobileMetalLibrary = try makeLibrary(filepath: path) - } catch _ { - fatalError("Counld't load paddle mobile library") - } - } - - if let inPaddleMobileLib = paddleMobileMetalLibrary { - return inPaddleMobileLib - } else { - fatalError("PaddleMobile metal libary is nil") - } + if let inPaddleMobileLib = paddleMobileMetalLibrary { + return inPaddleMobileLib + } else { + fatalError("PaddleMobile metal libary is nil") + } + } + + func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState { + let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary() + guard let function = useLib.makeFunction(name: funcName) else { + fatalError(" function " + funcName + " not found") + } + do { + let pipLine = try makeComputePipelineState(function: function) + return pipLine + } catch let error { + print(error) + fatalError("make pip line error occured : \(error)") } - func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState { - let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary() - guard let function = useLib.makeFunction(name: funcName) else { - fatalError(" function " + funcName + " not found") - } - do { - let pipLine = try makeComputePipelineState(function: function) - return pipLine - } catch _ { - fatalError("make pip line error occured") - } - + } + + func makeBuffer

(value: [P]) -> MTLBuffer { + let buffer = makeBuffer(length: value.count * MemoryLayout

.size, options: MTLResourceOptions.storageModeShared) + let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout

.size) + for i in 0..(texture: MTLTexture, cb: ([Int], P)->Void) -> Void { + let bpR = texture.width * 4 * MemoryLayout

.size + let bpI = texture.height * bpR + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1)) + for i in 0.. = UnsafeMutablePointer

.allocate(capacity: bpI) + texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i) + for tx in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(value: [P]) -> MTLBuffer { - let buffer = makeBuffer(length: value.count * MemoryLayout

.size, options: MTLResourceOptions.storageModeShared) - let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout

.size) - for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{ - - let textureDesc = MTLTextureDescriptor.init() - textureDesc.width = textureWidth - textureDesc.height = textureHeight - textureDesc.depth = 1 - textureDesc.usage = [.shaderRead, .shaderWrite] - textureDesc.pixelFormat = .rgba32Float - textureDesc.textureType = .type2DArray - textureDesc.storageMode = .shared - textureDesc.cpuCacheMode = .defaultCache - textureDesc.arrayLength = arrayLength - let texture = makeTexture(descriptor: textureDesc)! - - if arrayLength == 1 && value.count >= 4{ - let pointer: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: value.count * MemoryLayout

.size) - for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + if dim.count == 3 { + return texture2tensor_3(texture: texture, dim: dim, transpose: transpose) + } else if dim.count == 2 { + return texture2tensor_2(texture: texture, dim: dim, transpose: transpose) + } else if dim.count == 1 { + return texture2tensor_1(texture: texture, dim: dim, transpose: transpose) + } + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(value: [P], dim: [Int], transpose: [Int] = [0, 1, 2, 3], inComputePrecision: ComputePrecision = .Float32) -> MTLTexture { + if value.count > 0 { + assert(value.count == dim.reduce(1) { $0 * $1 }) + } + + var tdim: [Int] = [1, 1, 1, 1] + for i in 0.. 0 { + var rcount: Int = (ndim[0] * ndim[3] + 3) / 4 + rcount = rcount * 4 * ndim[1] * ndim[2] + var nvalue: [Float32] = .init(repeating: 0.0, count: rcount) + + for i0 in 0...size - let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth)) - texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow) - } else { - - - + } } - - return texture + } + + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1)) + if inComputePrecision == .Float16 { + let xvalue: [UInt16] = .init(repeating: 0, count: rcount) + let pointer: UnsafeMutablePointer = UnsafeMutablePointer(mutating: nvalue) + let outputP: UnsafeMutablePointer = UnsafeMutablePointer(mutating: xvalue) + float32ToFloat16(input: pointer, output: outputP, count: rcount) + let bpR = ndim[2] * 4 * 2 + let bpI = ndim[1] * bpR + for i in 0.. = UnsafeMutablePointer(mutating: nvalue) + let bpR = ndim[2] * 4 * MemoryLayout

.size + let bpI = ndim[1] * bpR + for i in 0..(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{ + + let textureDesc = MTLTextureDescriptor.init() + textureDesc.width = textureWidth + textureDesc.height = textureHeight + textureDesc.depth = 1 + textureDesc.usage = [.shaderRead, .shaderWrite] + textureDesc.pixelFormat = .rgba32Float + textureDesc.textureType = .type2DArray + textureDesc.storageMode = .shared + textureDesc.cpuCacheMode = .defaultCache + textureDesc.arrayLength = arrayLength + let texture = makeTexture(descriptor: textureDesc)! + + if value.count >= 4{ + let counts = arrayLength * 4 * textureWidth * textureHeight + let pointer: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: counts * MemoryLayout

.size) + for i in 0...size + let bytesPerImage = texture.height * bytesPerRow + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth)) + for i in 0..(stridable: Bool = true) -> [(index: Int, value: P)] { - var arr: [P] = floatArray { (p: P) -> P in - return p; + + func stridableFloatArray

(stridable: Bool = true) -> [(index: Int, value: P)] { + var arr: [P] = floatArray { (p: P) -> P in + return p; + } + var result: [(index: Int, value: P)] = [] + if arr.count > 100 && stridable { + for j in stride(from: 0, to: arr.count , by: arr.count / 100){ + result.append((j, arr[j])) + } + } else { + for j in 0..(res: (P) -> T) -> [T] { + var fArr: [T] = [] + if textureType == .type2DArray { + for i in 0...size, alignment: MemoryLayout

.alignment) + let bytesPerRow = width * depth * 4 * MemoryLayout

.size + let bytesPerImage = width * height * depth * 4 * MemoryLayout

.size + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) + getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) + let p = bytes.assumingMemoryBound(to: P.self) + + for j in 0.. 100 && stridable { - for j in stride(from: 0, to: arr.count , by: arr.count / 100){ - result.append((j, arr[j])) - } + bytes.deallocate() + } + } else if textureType == .type2D { + let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout

.size, alignment: MemoryLayout

.alignment) + let bytesPerRow = width * depth * 4 * MemoryLayout

.size + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) + getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0) + let p = bytes.assumingMemoryBound(to: P.self) + + for j in 0.. [Float32] { + if pixelFormat == .rgba32Float { + let float32Array = floatArray { (f: Float32) -> Float32 in + return f + } + return float32Array + } else if pixelFormat == .rgba16Float { + + var float16Array = floatArray { (f: Float16) -> Float16 in + return f + } + return float16To32(input: &float16Array, count: float16Array.count) + } else { + fatalError() + } + } + + func logDesc(header: String = "", stridable: Bool = true) -> T? { + print(header) + print("texture: \(self)") + // let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable) + // print(res) + + if textureType == .type2DArray { + for i in 0...size, alignment: MemoryLayout.alignment) + let bytesPerRow = width * depth * 4 * MemoryLayout.size + let bytesPerImage = width * height * depth * 4 * MemoryLayout.size + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) + getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) + let p = bytes.assumingMemoryBound(to: T.self) + str += "2d array count : \(width * height * depth * 4) \n" + if stridable && width * height * depth * 4 > 20 { + for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 20){ + str += " index \(j): \(p[j])" + } } else { - for j in 0...size, alignment: MemoryLayout.alignment) + let bytesPerRow = width * depth * 4 * MemoryLayout.size + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) + getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0) + let p = bytes.assumingMemoryBound(to: T.self) + str += "2d count : \(width * width * 4) \n" + + if stridable { + for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 20){ + str += "index \(j): \(p[j]) " + } + } else { + for j in 0.. [Float32] { + var textureArray: [Float32] + if pixelFormat == .rgba32Float { + textureArray = floatArray { (i : Float32) -> Float32 in + return i + } + } else if pixelFormat == .rgba16Float { + + var textureFloat16Array = floatArray { (i : Float16) -> Float16 in + return i + } + textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count) + } else { + fatalError(" 目前还不支持其他类型 ") + } + + var output: [Float32] = [] + for s in 0.. [Float32] { +// print("origin dim: \(dim)") +// print("texture: ") +// print(self) + + var textureArray: [Float32] + if pixelFormat == .rgba32Float { + textureArray = floatArray { (i : Float32) -> Float32 in + return i + } + } else if pixelFormat == .rgba16Float { + var textureFloat16Array = floatArray { (i : Float16) -> Float16 in + return i + } + textureArray = float16To32(input: &textureFloat16Array, count: textureFloat16Array.count) + } else { + fatalError(" 目前还不支持其他类型 ") } - func floatArray(res: (P) -> T) -> [T] { - var fArr: [T] = [] - if textureType == .type2DArray { - for i in 0...size, alignment: MemoryLayout

.alignment) - let bytesPerRow = width * depth * 4 * MemoryLayout

.size - let bytesPerImage = width * height * depth * 4 * MemoryLayout

.size - let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) - getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) - let p = bytes.assumingMemoryBound(to: P.self) - - for j in 0.. dim.c { + for i in 0..<(4 - ((sliceIndex * 4 + 4) - dim.c)) { + let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i] + output.append(value) } - } else if textureType == .type2D { - let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout

.size, alignment: MemoryLayout

.alignment) - let bytesPerRow = width * depth * 4 * MemoryLayout

.size - let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) - getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0) - let p = bytes.assumingMemoryBound(to: P.self) - - for j in 0..(header: String = "", stridable: Bool = true) -> T? { - print(header) - print("texture: \(self)") - let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable) - print(res) + return output + } -// if textureType == .type2DArray { -// for i in 0...size, alignment: MemoryLayout.alignment) -// let bytesPerRow = width * depth * 4 * MemoryLayout.size -// let bytesPerImage = width * height * depth * 4 * MemoryLayout.size -// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) -// getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) -// let p = bytes.assumingMemoryBound(to: T.self) -// str += "2d array count : \(width * height * depth * 4) \n" -// if stridable && width * height * depth * 4 > 100 { -// for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){ -// str += " index \(j): \(p[j])" -// } -// } else { -// for j in 0...size, alignment: MemoryLayout.alignment) -// let bytesPerRow = width * depth * 4 * MemoryLayout.size -// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) -// getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0) -// let p = bytes.assumingMemoryBound(to: T.self) -// str += "2d count : \(width * width * 4) \n" -// -// if stridable { -// for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){ -// str += "index \(j): \(p[j]) " -// } -// } else { -// for j in 0..(header: String = "", stridable: Bool = true) -> T? { - print(header) - print("MTLBuffer: \(self) ") - var str = "" - if stridable && length/MemoryLayout.stride > 1000{ - for j in stride(from: 0, to: length, by: length/MemoryLayout.stride / 100){ - str += " \(contents().assumingMemoryBound(to: T.self)[j])" - } - } else { - for i in 0...size { - str += " \(contents().assumingMemoryBound(to: T.self)[i])" - } - } - print(str) - return nil + func logDesc(header: String = "", stridable: Bool = true) -> T? { + print(header) + print("MTLBuffer: \(self) ") + var str = "" + if stridable && length/MemoryLayout.stride > 1000{ + for j in stride(from: 0, to: length, by: length/MemoryLayout.stride / 100){ + str += " \(contents().assumingMemoryBound(to: T.self)[j])" + } + } else { + for i in 0...size { + str += " \(contents().assumingMemoryBound(to: T.self)[i])" + } } - - func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture { - let textureDesc = MTLTextureDescriptor.init() - textureDesc.width = textureWidth - textureDesc.height = textureHeight - textureDesc.depth = 1 - textureDesc.usage = [.shaderRead, .shaderWrite] - textureDesc.pixelFormat = .rgba32Float - textureDesc.textureType = .type2DArray - textureDesc.storageMode = .shared - textureDesc.cpuCacheMode = .defaultCache - textureDesc.arrayLength = arrayLength - let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)! - return texture + print(str) + return nil + } + + func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture { + let textureDesc = MTLTextureDescriptor.init() + textureDesc.width = textureWidth + textureDesc.height = textureHeight + textureDesc.depth = 1 + textureDesc.usage = [.shaderRead, .shaderWrite] + textureDesc.pixelFormat = .rgba32Float + textureDesc.textureType = .type2DArray + textureDesc.storageMode = .shared + textureDesc.cpuCacheMode = .defaultCache + textureDesc.arrayLength = arrayLength + let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)! + return texture + } + + func array() -> [T] { + var array: [T] = [] + let pointer = contents().bindMemory(to: T.self, capacity: length) + for i in 0..<(length / MemoryLayout.size) { + array.append(pointer[i]) } - - - + return array; + } } - - - - diff --git a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift index a2927c4693c35fd8181d891cc33fa27c2c4cf0b9..91afae6f6415d187a69063381f3a27a6bbe92b81 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift @@ -1,10 +1,16 @@ -// -// TestConvAddBatchNormRelu.swift -// paddle-mobile-demo -// -// Created by liuRuiLong on 2018/7/25. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import Metal import Foundation @@ -17,6 +23,204 @@ public class PaddleMobileUnitTest { queue = inQueue } + private func indentPrintTensor(tensor: [Float32], dim: [Int], ix: [Int], indentLevel: Int) { + let indent = Array.init(repeating: " ", count: indentLevel).joined(separator: "") + var tx = ix + if dim.count == indentLevel + 1 { + var log: String = indent + "[" + for i in 0.. 0 { + log += ", " + } + log += tensor[c].description + } + log += "]" + if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) { + log += "," + } + print(log) + } else { + print(indent + "[") + for i in 0.. 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) { + print(indent + "],") + } else { + print(indent + "]") + } + } + } + + private func tensorPrint(tensor: [Float32], dim: [Int]) { + var detectPos = -1 + var odim = 1 + var ndim = dim + for i in 0..= -1) + if (detectPos == -1) { + assert(tensor.count == odim) + } else { + assert(tensor.count % odim == 0) + ndim[detectPos] = tensor.count / odim + } + indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0) + } + + public func testConcat() { +// let buffer = queue.makeCommandBuffer() ?! "buffer is nil" +// var it: [[Float32]] = [] +// for _ in 0..<7 { +// it.append((0..<12).map { Float32($0) }) +// } +// let input = it.map { device.tensor2texture(value: $0, dim: [3, 4]) } +// let output = device.tensor2texture(value: [Float32](), dim: [3, 28]) +// +// let param = ConcatTestParam.init( +// input: input, +// output: output, +// dims: [[3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4], [3, 4]], +// axis: 1, +// odim: [3, 28] +// ) +// let concatKernel = ConcatKernel.init(device: device, testParam: param) +// concatKernel.test(cmdBuffer: buffer, param: param) +// buffer.addCompletedHandler { (buffer) in +// for i in 0...init(device: device, testParam: param) +// reshapeKernel.test(commandBuffer: buffer, testParam: param) +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inTexture.logDesc() +// let _: Float32? = outTexture.logDesc() +// self.tensorPrint(tensor: input, dim: [2, 3, 4]) +// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6]) +// self.tensorPrint(tensor: tx, dim: [4, 6]) +// } + +// let input: [Float32] = (0..<24).map { Float32($0) } +// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) +// let outTexture = device.tensor2texture(value: [Float32](), dim: [24]) +// let mp = ReshapeMetalParam.init( +// idim: (1, 2, 3, 4), +// itrans: (0, 1, 2, 3), +// odim: (1, 1, 1, 24), +// otrans: (0, 1, 2, 3) +// ) +// let param = ReshapeTestParam.init( +// inputTexture: inTexture, +// outputTexture: outTexture, +// param: mp +// ) +// let reshapeKernel = ReshapeKernel.init(device: device, testParam: param) +// reshapeKernel.test(commandBuffer: buffer, testParam: param) +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inTexture.logDesc() +// let _: Float32? = outTexture.logDesc() +// self.tensorPrint(tensor: input, dim: [2, 3, 4]) +// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24]) +// self.tensorPrint(tensor: tx, dim: [24]) +// } +// +// +// buffer.commit() + } + + public func testTranspose() { + + let buffer = queue.makeCommandBuffer() ?! "buffer is nil" +// var input: [Float32] = [] +// for i in 0..<72 { +// input.append(Float32(i)) +// } +//// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3) +// let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]); +// // group 1 +// let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4]) +// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0]) +//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1]) +//// // group 2 +//// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6) +//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1]) +//// +// let transposeKernel = TransposeKernel.init(device: device, testParam: param) +// +// transposeKernel.test(commandBuffer: buffer, param: param) +// +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) +// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) +// self.tensorPrint(tensor: input, dim: [4, 3, 2, 3]) +// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4]) +// self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4]) +// } +// +// let input: [Float32] = (0..<24).map { Float32($0) } +// let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) +// let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2]) +// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1]) +// let transposeKernel = TransposeKernel.init(device: device, testParam: param) +// +// transposeKernel.test(commandBuffer: buffer, param: param) +// +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) +// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) +// self.tensorPrint(tensor: input, dim: [2, 3, 4]) +// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2]) +// self.tensorPrint(tensor: tx, dim: [3, 4, 2]) +// } +// + buffer.commit() + } + public func testConvAddBnRelu() { let buffer = queue.makeCommandBuffer() ?! " buffer is nil " @@ -116,7 +320,7 @@ public class PaddleMobileUnitTest { let offsetX = filterSize.width/2 - paddings.0 let offsetY = filterSize.height/2 - paddings.1 - let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0)) + let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), dilationX: UInt16(1), dilationY: UInt16(1)) let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize) @@ -132,16 +336,6 @@ public class PaddleMobileUnitTest { } buffer.commit() - - -// let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>) - - -// let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>) - -// ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>) - - } } diff --git a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift index 930198fbf9c2cbfd917ddcb9ecb1fe02767c21f9..23ad7113971de3d0843abe17accfe3d67f0caaa9 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift @@ -1,10 +1,16 @@ -// -// Tools.swift -// paddle-mobile -// -// Created by liuRuiLong on 2018/7/26. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import Foundation diff --git a/metal/paddle-mobile/paddle-mobile/Common/Types.swift b/metal/paddle-mobile/paddle-mobile/Common/Types.swift index 98353617f5090f1eeac0c644c17548555638a6ca..a1197ed2188a263af3c0819fec09b584af501dd3 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/Types.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/Types.swift @@ -13,80 +13,228 @@ limitations under the License. */ import Foundation +import Accelerate public protocol SummableMultipliable: Equatable { - static func +(lhs: Self, rhs: Self) -> Self - static func *(lhs: Self, rhs: Self) -> Self - static func -(lhs: Self, rhs: Self) -> Self + static func +(lhs: Self, rhs: Self) -> Self + static func *(lhs: Self, rhs: Self) -> Self + static func -(lhs: Self, rhs: Self) -> Self } public protocol PrecisionType: SummableMultipliable{ - init(inFloat: Float32) - init(inFloat16: Float16) - init(_ inP: P) - static var bitSize: UInt { get } + init(inFloat: Float32) + init(inFloat16: Float16) + init(_ inP: P) + static var bitSize: UInt { get } } public typealias Float16 = Int16 extension Float16: PrecisionType { - public static func * (prefix: Float16, postfix: Float16) { - return prefix * postfix + public static func * (prefix: Float16, postfix: Float16) { + return prefix * postfix + } + + public init

(_ inP: P) where P : PrecisionType { + if P.bitSize == Float32.bitSize { + self = Float16(inFloat: inP as! Float32) + } else if P.bitSize == Float16.bitSize { + self = inP as! Float16 + } else { + fatalError() } - - public init

(_ inP: P) where P : PrecisionType { - if P.bitSize == Float32.bitSize { - self = Float16(inFloat: inP as! Float32) - } else if P.bitSize == Float16.bitSize { - self = inP as! Float16 - } else { - fatalError() + } + + public static var bitSize: UInt { + return 16 + } + + public init(inFloat16: Float16) { + self = inFloat16 + } + public init(inFloat: Float32) { + self = Int16(inFloat) + } +} + +extension Float32: PrecisionType { + public init

(_ inP: P) where P : PrecisionType { + if P.bitSize == Float32.bitSize { + self = inP as! Float32 + } else if P.bitSize == Float16.bitSize { + self = Float32.init(inP as! Float16) + } else { + fatalError() + } + } + + public init(inFloat: Float32) { + self = inFloat + } + + public init(inFloat16: Float16) { + self = Float32.init(inFloat16) + } + + public static var bitSize: UInt { + return 32 + } +} + +public func float32ToFloat16(input: UnsafeMutablePointer, output: UnsafeMutableRawPointer, count: Int) { + var float32Buffer = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 4) + var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2) + guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else { + fatalError(" float 32 to float 16 error ! ") + } +} + +public func float16To32(input: UnsafeMutablePointer, count: Int) -> [Float32] { + var output = Array.init(repeating: 0.0, count: count) + float16to32(input: input, output: &output, count: count) + return output +} + +public func float16to32(input: UnsafeMutablePointer, output: UnsafeMutablePointer, count: Int) { + var bufferFloat16 = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 2) + var bufferFloat32 = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 4) + if vImageConvert_Planar16FtoPlanarF(&bufferFloat16, &bufferFloat32, 0) != kvImageNoError { + fatalError(" convert float16 to float32 error") + } +} + +// N - 0 C - 1 H - 2 W - 3 +struct DataLayout { + + static func NCHW(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout { + return DataLayout.init([(.N, dim[0]), (.C, dim[1]), (.H, dim[2]), (.W, dim[3])]) + } + + static func NHWC(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout { + return DataLayout.init([(.N, dim[0]), (.H, dim[1]), (.W, dim[2]), (.C, dim[3])]) + } + + func count() -> Int { + return layoutWithDim.count + } + + var N: Int? { + get { + for layoutDim in layoutWithDim { + if layoutDim.0 == .N { + return layoutDim.1 } + } + return nil } - - public static var bitSize: UInt { - return 16 + set { + var newN = (Layout.N, newValue) + if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in + return layout == .N + }) { + fatalError() + } } - - public init(inFloat16: Float16) { - self = inFloat16 + } + var C: Int? { + get { + for layoutDim in layoutWithDim { + if layoutDim.0 == .C { + return layoutDim.1 + } + } + return nil } - public init(inFloat: Float32) { - self = Int16(inFloat) + set { + var newN = (Layout.C, newValue) + if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in + return layout == .N + }) { + fatalError() + } } - - - -} - -extension Float32: PrecisionType { - public init

(_ inP: P) where P : PrecisionType { - if P.bitSize == Float32.bitSize { - self = inP as! Float32 - } else if P.bitSize == Float16.bitSize { - self = Float32.init(inP as! Float16) - } else { - fatalError() + } + var H: Int? { + get { + for layoutDim in layoutWithDim { + if layoutDim.0 == .H { + return layoutDim.1 } + } + return nil } - - public init(inFloat: Float32) { - self = inFloat + set { + var newN = (Layout.H, newValue) + if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in + return layout == .H + }) { + fatalError() + } } - - public init(inFloat16: Float16) { - self = Float32.init(inFloat16) + } + var W: Int? { + get { + for layoutDim in layoutWithDim { + if layoutDim.0 == .W { + return layoutDim.1 + } + } + return nil + } + set { + var newN = (Layout.W, newValue) + if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in + return layout == .W + }) { + fatalError() + } } + } + + + init(_ inLayout: [(Layout, Int)]) { + layoutWithDim = inLayout + } + + func layout() -> [Layout] { + return layoutWithDim.map({ (layout: Layout, dim: Int) -> Layout in + return layout + }) + } + + var layoutWithDim: [(Layout, Int)] = [(.N, 0), (.C, 0), (.H, 0), (.W, 0)] + + func convertTo(inLayout: [Layout]) { - public static var bitSize: UInt { - return 32 + } + + enum Layout: Int{ + case N = 0 + case C = 1 + case H = 2 + case W = 3 + static func defaultLayout() -> [Layout] { + return [N, C, H, W] } + } } -public enum DataLayout { - case NCHW - case NHWC +extension DataLayout: Equatable { + public static func == (lhs: DataLayout, rhs: DataLayout) -> Bool { + if lhs.layoutWithDim.count == rhs.layoutWithDim.count { + var result = true + for i in 0.. { + guard let inResultBuffer = resultBuffer else { + fatalError() + } + return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity) + } + +} + +extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible { + var description: String { + fatalError() +// return "\(result)" + } + + var debugDescription: String { + fatalError() +// return "\(result)" + } + + +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift deleted file mode 100644 index 0dcb3151e21cc0f3968a07da39366d4ba5fd5813..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Executor.swift +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -import Foundation - -public class ResultHolder { - public let dim: [Int] - public let resultArr: [P] - public let elapsedTime: Double - public init(inDim: [Int], inResult: [P], inElapsedTime: Double) { - dim = inDim - resultArr = inResult - elapsedTime = inElapsedTime - } -} - -extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible { - public var debugDescription: String { - var str = "" - str += "Dim: \(dim) \n value:[ " - if resultArr.count < 20 { - for d in resultArr { - str += " \(d) " - } - } else { - for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) { - str += " \(resultArr[d]) " - } - } - str += " ]" - return str - } - - public var description: String { - return debugDescription - } -} - -public class Executor { - var ops: [Runable & InferShaperable] = [] - let program: Program - let device: MTLDevice - let queue: MTLCommandQueue - public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws { - program = inProgram - device = inDevice - queue = inQueue - for block in inProgram.programDesc.blocks { - //block.ops.count - for i in 0...shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) - op.inferShape() - ops.append(op) - } catch let error { - throw error - } - } - -// for op in block.ops { -// do { -// let op = try OpCreator

.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) -// op.inferShape() -// ops.append(op) -// } catch let error { -// throw error -// } -// } - } - } - - public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder

) -> Void, preProcessKernle: CusomKernel? = nil) throws { - guard let buffer = queue.makeCommandBuffer() else { - throw PaddleMobileError.predictError(message: "CommandBuffer is nil") - } - let resInput: MTLTexture - if let inPre = preProcessKernle { - do { - try inPre.compute(inputTexuture: input, commandBuffer: buffer) - resInput = inPre.outputTexture - } catch let error { - throw error - } - } else { - resInput = input - } - - let beforeDate = Date.init() - let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect)) - program.scope.setInput(input: inputTexture) - - for op in ops { - do { - try op.run(device: device, buffer: buffer) - } catch let error { - throw error - } - } - - buffer.addCompletedHandler { (commandbuffer) in -// let inputArr = resInput.floatArray(res: { (p:P) -> P in -// return p -// }) -// print(inputArr) - -// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray() -// print(stridableInput) - -// let _: Flo? = input.logDesc(header: "input: ", stridable: true) -// for op in self.ops { -// op.delogOutput() -// } -// return - -// self.ops[2].delogOutput() - - - let afterDate = Date.init() - - guard let outputVar = self.program.scope.output() else { - fatalError("output nil") - } - - guard let output = outputVar as? Texture

else { - fatalError("output var type error") - } - let resultHodlder = ResultHolder

.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in - return p - }), inElapsedTime: afterDate.timeIntervalSince(beforeDate)) - completionHandle(resultHodlder) - } - buffer.commit() - } - - public func clear() { - program.scope.clear() - } - -} - -//public let paddle_executor: Executor = Executor.init() diff --git a/metal/paddle-mobile/paddle-mobile/Genet.swift b/metal/paddle-mobile/paddle-mobile/Genet.swift new file mode 100644 index 0000000000000000000000000000000000000000..d803d1e99537e3a24d1fae5a5653d680bd811ac2 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Genet.swift @@ -0,0 +1,54 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +public class Genet: Net { + @objc public override init(device: MTLDevice) { + super.init(device: device) + means = [128.0, 128.0, 128.0] + scale = 0.017 + except = 0 + modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null" + paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null" + modelDir = "" + preprocessKernel = GenetPreProccess.init(device: device) + dim = (n: 1, h: 128, w: 128, c: 3) + } + + @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { + super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) + means = [128.0, 128.0, 128.0] + scale = 0.017 + except = 0 + modelPath = "" + paramPath = "" + modelDir = "" + preprocessKernel = GenetPreProccess.init(device: device) + dim = (n: 1, h: 128, w: 128, c: 3) + } + + class GenetPreProccess: CusomKernel { + init(device: MTLDevice) { + let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3) + super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false) + } + } + + override public func resultStr(res: ResultHolder) -> String { +// fatalError() + return " \(res.result![0]) ... " + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Loader.swift b/metal/paddle-mobile/paddle-mobile/Loader.swift deleted file mode 100644 index c68b68e1caffcadc2adb2b4ddf245c89b2c5a223..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Loader.swift +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -import Foundation -import SwiftProtobuf - -public class Loader { - class ParaLoader { - let file: UnsafeMutablePointer - let fileSize: Int - var nowIndex: Int - init(paramPath: String) throws { - guard let tmpFile = fopen(paramPath, "rb") else { - throw PaddleMobileError.loaderError(message: "open param file error" + paramPath) - } - file = tmpFile - fseek(file, 0, SEEK_END) - fileSize = ftell(file) - guard fileSize > 0 else { - throw PaddleMobileError.loaderError(message: "param file size is too small") - } - rewind(file) - nowIndex = 0 - } - - func read(tensor: Tensor

) throws { - guard nowIndex <= fileSize else { - throw PaddleMobileError.loaderError(message: "out of the file range") - } - - func pointerReader(type: T.Type) -> T { - let ptr = UnsafeMutablePointer.allocate(capacity: MemoryLayout.size) - fread(ptr, 1, MemoryLayout.size, file) - nowIndex += MemoryLayout.size - let pointee = ptr.pointee - ptr.deinitialize(count: MemoryLayout.size) - ptr.deallocate() - return pointee - } - - let _ = pointerReader(type: UInt32.self) - let lodLevel = pointerReader(type: UInt64.self) - for _ in 0...size)){ - _ = pointerReader(type: size_t.self) - } - } - - let _ = pointerReader(type: UInt32.self) - - let tensorDescSize = pointerReader(type: Int32.self) - - fseek(file, Int(tensorDescSize), SEEK_CUR) - nowIndex += Int(tensorDescSize) - - /* - 这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度 - */ - - //现在模型传入模型为 Float 类型, 这块应该根据模型来 -// let tmpCapacity = MemoryLayout.size * tensor.numel() -// let tmpPointer = UnsafeMutablePointer.allocate(capacity: tmpCapacity); - let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) - - guard bytesRead == tensor.data.size else { - throw PaddleMobileError.loaderError(message: "param read size error") - } - - // TODO: use script to convert -// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file) -// for i in 0.. Program{ - guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else { - throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !") - } - - do { - let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init( - serializedData: modelData) - - let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram) - let programDesc = ProgramOptimize

.init().optimize(originProgramDesc: originProgramDesc) - print(programDesc) - - guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else { - throw PaddleMobileError.loaderError(message: "load para error") - } - - guard programDesc.blocks.count > 0 else { - throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0") - } - - // to get feed key and fetch key - let block = programDesc.blocks[0] - guard let firstOp = block.ops.first, let lastOp = block.ops.last else { - throw PaddleMobileError.loaderError(message: "at least two operator") - } - guard firstOp.type == gFeedType, lastOp.type == gFetchType else { - throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch") - } - - guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else { - throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found") - } - guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else { - throw PaddleMobileError.loaderError(message: "feed key or fetch key not found") - } - - let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey) - - // to load memory - for block in programDesc.blocks { - for varDesc in block.vars { - if (varDesc.type == .LodTensor) { - guard let tensorDesc = varDesc.tensorDesc else { - throw PaddleMobileError.loaderError(message: "get tensor desc failed") - } - -// guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout

.size else { -// throw PaddleMobileError.memoryError(message: "PrecisionType not support") -// } - - if (varDesc.persistable - && varDesc.type != .FeedMiniBatch - && varDesc.type != .FetchList) { - let dimArr = tensorDesc.dims - - guard dimArr.count > 0 else { - throw PaddleMobileError.loaderError(message: "tensor desc dim size error") - } - - let dim = Dim.init(inDim: dimArr) - let tensor = Tensor

.init(inDim: dim, inLayout: tensorDesc.dataLayout) - do { - try paraLoader.read(tensor: tensor) - } catch let error { - throw error - } - tensor.convert(to: .NHWC) -// tensor.initBuffer(device: device) - scope[varDesc.name] = tensor - } else { - let dim = Dim.init(inDim: tensorDesc.NHWCDim) - scope[varDesc.name] = Texture

.init(device: device, inDim: dim) - } - } else { - if varDesc.name == fetchKey { - scope[varDesc.name] = ResultHolder

.init(inDim: [], inResult: [], inElapsedTime: 0.0) - } else if varDesc.name == feedKey { - } - } - } - } - - let program = Program.init(inProgramDesc: programDesc, inParamPath: paraPath, inScope: scope) - - return program - } catch _ { - throw PaddleMobileError.loaderError(message: "protobuf decoder error") - } - } -} diff --git a/metal/paddle-mobile/paddle-mobile/MobileNet.swift b/metal/paddle-mobile/paddle-mobile/MobileNet.swift new file mode 100644 index 0000000000000000000000000000000000000000..7d10a920d15e751f29fce7f9f6be71cd6a2d6b69 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/MobileNet.swift @@ -0,0 +1,70 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class MobileNet: Net{ + + class MobilenetPreProccess: CusomKernel { + init(device: MTLDevice) { + let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) + super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false) + } + } + + class PreWords { + var contents: [String] = [] + init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) { + if let filePath = inBundle.path(forResource: fileName, ofType: type) { + let string = try! String.init(contentsOfFile: filePath) + contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{ + String($0[$0.index($0.startIndex, offsetBy: 10)...]) + } + }else{ + fatalError("no file call \(fileName)") + } + } + subscript(index: Int) -> String { + return contents[index] + } + } + + let labels = PreWords.init(fileName: "synset") + + override public func resultStr(res: ResultHolder) -> String { + guard let resPointer = res.result else { + fatalError() + } + var s: [String] = [] + (0.. String { + return " \(res)" + } + + override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { + +// guard let interRes = paddleMobileRes.intermediateResults else { +// fatalError(" need have inter result ") +// } +// +// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture else { +// fatalError(" need score ") +// } +// +// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture else { +// fatalError() +// } +// +// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) +//// print("score: ") +//// print(scoreFormatArr.strideArray()) +//// +// var bboxArr = bbox.metalTexture.float32Array() +//// print("bbox: ") +//// print(bboxArr.strideArray()) +// +// let nmsCompute = NMSCompute.init() +// nmsCompute.scoreThredshold = 0.01 +// nmsCompute.nmsTopK = 400 +// nmsCompute.keepTopK = 200 +// nmsCompute.nmsEta = 1.0 +// nmsCompute.nmsThreshold = 0.45 +// nmsCompute.background_label = 0; +// +// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] +// +// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] +// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { +// fatalError( " result error " ) +// } +// +// let output: [Float32] = result.map { $0.floatValue } +// +// +// return output + fatalError() + } + + + + +} diff --git a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift new file mode 100644 index 0000000000000000000000000000000000000000..6c7bd9b9c6ae4f55327a370ceb1e682a8e5e7658 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift @@ -0,0 +1,153 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +public class MobileNet_ssd_AR: Net{ + @objc public override init(device: MTLDevice) { + super.init(device: device) + means = [103.94, 116.78, 123.68] + scale = 1 + except = 2 + modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null" + paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null" + modelDir = "" + preprocessKernel = MobilenetssdPreProccess.init(device: device) + dim = (n: 1, h: 160, w: 160, c: 3) + } + + @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { + super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) + means = [103.94, 116.78, 123.68] + scale = 1 + except = 2 + modelPath = "" + paramPath = "" + modelDir = "" + preprocessKernel = MobilenetssdPreProccess.init(device: device) + dim = (n: 1, h: 160, w: 160, c: 3) + } + + class MobilenetssdPreProccess: CusomKernel { + init(device: MTLDevice) { + let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3) + super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false) + } + } + + override public func resultStr(res: ResultHolder) -> String { + return " \(res.result![0])" + } + + override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { + guard let interRes = paddleMobileRes.intermediateResults else { + fatalError(" need have inter result ") + } + + guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else { + fatalError(" need score ") + } + + guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else { + fatalError() + } + +// let startDate = Date.init() + +// print("scoreFormatArr: ") +//print((0.. + originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) + + originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) + + originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) + + program.scope[output] = originTexture + + if i == 99 { + opDesc.attrs["axis"] = 0 + } else { + opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } + } + } + + for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] { + let opDesc = program.programDesc.blocks[0].ops[i] + let output = opDesc.outputs["Out"]!.first! + let v = program.scope[output]! + + + + let originTexture = v as! Texture + originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) + opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } + } + + for i in [60, 101, 90, 97, 70, 80] { + let opDesc = program.programDesc.blocks[0].ops[i] + let output = opDesc.outputs["Out"]!.first! + let v = program.scope[output]! + let originTexture = v as! Texture + originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) + opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 + } + + for i in [102] { + let opDesc = program.programDesc.blocks[0].ops[i] + for output in opDesc.outputs["Out"]! { + let v = program.scope[output]! + let originTexture = v as! Texture + originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) + } + opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 + print(" split axis \(opDesc.attrs["axis"])") + } + // 99 + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Net.swift b/metal/paddle-mobile/paddle-mobile/Net.swift new file mode 100644 index 0000000000000000000000000000000000000000..ce9ec98a66e685eec3a688a5a29402a76567b0e2 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Net.swift @@ -0,0 +1,70 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + + +import Foundation + +public class ResultHolder: NSObject { + @objc public let result: UnsafeMutablePointer? + @objc public let capacity: Int + + init(inResult: UnsafeMutablePointer?, inCapacity: Int) { + result = inResult + capacity = inCapacity + } + + @objc public func releasePointer() { + result?.deinitialize(count: capacity) + result?.deallocate() + } +} + +public class Net: NSObject { + var except: Int = 0 + var means: [Float] = [] + var scale: Float = 0.0 + var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0) + var preprocessKernel: CusomKernel? = nil + var paramPointer: UnsafeMutableRawPointer? = nil + var paramSize: Int = 0 + var modelPointer: UnsafeMutableRawPointer? = nil + var modelSize: Int = 0 + var modelPath: String = "" + var paramPath: String = "" + var modelDir: String = "" + @objc public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { + self.paramPointer = paramPointer + self.paramSize = paramSize + self.modelPointer = modePointer + self.modelSize = modelSize + super.init() + } + + + public func resultStr(res: ResultHolder) -> String { + fatalError() + } + + func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { + return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity) + } + + @objc public init(device: MTLDevice) { + super.init() + } + + func updateProgram(program: Program) { + + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift index 0ba02af1c51ba218982cc116e2cf8500cfa14db0..9806042e9eb339d6d15f2cbfebe924b548d29922 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift @@ -43,14 +43,31 @@ class OpCreator { [gConvType : ConvOp

.creat, gBatchNormType : BatchNormOp

.creat, gReluType : ReluOp

.creat, - gElementwiseAdd : ElementwiseAddOp

.creat, + gElementwiseAddType : ElementwiseAddOp

.creat, gFeedType : FeedOp

.creat, gFetchType : FetchOp

.creat, gConvAddBatchNormReluType : ConvAddBatchNormReluOp

.creat, gPooType : PoolOp

.creat, gSoftmaxType : SoftmaxOp

.creat, gReshapeType : ReshapeOp

.creat, - gConvAddType : ConvAddOp

.creat] - + gConvAddType : ConvAddOp

.creat, + gDepthConvType : DepthConvOp

.creat, + gConcatType : ConcatOp

.creat, + gBoxcoderType : BoxcoderOp

.creat, + gConvBnReluType : ConvBNReluOp

.creat, + gDwConvBnReluType : DwConvBNReluOp

.creat, + gMulticlassNMSType : MulticlassNMSOp

.creat, + gTransposeType : TransposeOp

.creat, + gPriorBoxType : PriorBoxOp

.creat, + gPreluType : PreluOp

.creat, + gConv2dTransposeType : ConvTransposeOp

.creat, + gBilinearInterpType : BilinearInterpOp

.creat, + gSplit : SplitOp

.creat, + gShape : ShapeOp

.creat, + gFlatten : FlattenOp

.creat, + gConvAddPreluType : ConvAddPreluOp

.creat, + gConvAddAddPreluType : ConvAddAddPreluOp

.creat, + gElementwiseAddPreluType: ElementwiseAddPreluOp

.creat] + private init(){} } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift index 43f095d7008ad14ac71d610728e19ac6f6817800..9f868e35864d59be5711c4ac0a02787638eeae8f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift @@ -22,147 +22,199 @@ import Foundation */ protocol OpParam { - associatedtype OutputType: Variant - var output: OutputType { get set } - func outputDesc() -> String - - associatedtype ParamPrecisionType: PrecisionType - init(opDesc: OpDesc, inScope: Scope) throws - static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType - static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType - static func input(inputs: [String : [String]], from: Scope) throws -> VarType - static func output(outputs: [String : [String]], from: Scope) throws -> VarType - static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType - static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType - static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType - static func getAttr(key: String, attrs: [String : Attr]) throws -> T + associatedtype OutputType: Variant + var output: OutputType { get set } + func outputDesc() -> String + + associatedtype ParamPrecisionType: PrecisionType + init(opDesc: OpDesc, inScope: Scope) throws + static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType + static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType + static func input(inputs: [String : [String]], from: Scope) throws -> VarType + static func output(outputs: [String : [String]], from: Scope) throws -> VarType + static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType + static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType + + static func inputImage(inputs: [String : [String]], from: Scope) throws -> VarType + + static func outputBoxes(outputs: [String : [String]], from: Scope) throws -> VarType + + static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType + + static func outputVariances(outputs: [String : [String]], from: Scope) throws -> VarType + + static func getAttr(key: String, attrs: [String : Attr]) throws -> T + + static func paramInputAlpha(inputs: [String : [String]], from: Scope) throws -> VarType + } extension OpParam { - func outputDesc() -> String { - return output.debugDescription + func outputDesc() -> String { + return output.debugDescription + } + + static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType { + guard let mapKeys = map[key], mapKeys.count > 0 else { + throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty") } - - static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType { - guard let mapKeys = map[key], mapKeys.count > 0 else { - throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty") - } - guard let variant = from[mapKeys[0]], let v = variant as? VarType else { - throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope") - } - return v + guard let variant = from[mapKeys[0]] else { + throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope") } - static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from) - - return tensorX - } catch let error { - throw error - } + guard let v = variant as? VarType else { + throw PaddleMobileError.paramError(message: " type error") + } - - static func input(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) - return tensorInput - } catch let error { - throw error - } + return v + } + + static func outputVariances(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from) + return tensorVariances + } catch let error { + throw error } - - static func output(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) - return tensorOutput - } catch let error { - throw error - } - } - static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from) - return tensorOutputY - } catch let error { - throw error - } - } - static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from) - return tensorY - } catch let error { - throw error - } + } + + static func paramInputAlpha(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from) + return alphaTensor + } catch let error { + throw error } - - static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) - return out - } catch let error { - throw error - } - } - static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from) - return tensorFilter - } catch let error { - throw error - } + } + + + static func inputImage(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from) + return tensorImage + } catch let error { + throw error } - - static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) - return tensorBias - } catch let error { - throw error - } + } + + static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from) + return tensorX + } catch let error { + throw error } - - static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) - return tensorMean - } catch let error { - throw error - } + } + + static func outputBoxes(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from) + return tensorBox + } catch let error { + throw error } - - static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) - return tensorScale - } catch let error { - throw error - } + } + + static func input(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) + return tensorInput + } catch let error { + throw error } - - static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) - return tensorVariance - } catch let error { - throw error - } + } + + static func output(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) + return tensorOutput + } catch let error { + throw error + } + } + static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from) + return tensorOutputY + } catch let error { + throw error + } + } + static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from) + return tensorY + } catch let error { + throw error + } + } + + static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) + return out + } catch let error { + throw error + } + } + static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from) + return tensorFilter + } catch let error { + throw error + } + } + + static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) + return tensorBias + } catch let error { + throw error + } + } + + static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) + return tensorMean + } catch let error { + throw error + } + } + + static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) + return tensorScale + } catch let error { + throw error + } + } + + static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) + return tensorVariance + } catch let error { + throw error + } + } + + static func getAttr(key: String, attrs: [String : Attr]) throws -> T{ + guard let attr = attrs[key] else { + throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" ) } - static func getAttr(key: String, attrs: [String : Attr]) throws -> T{ - guard let attr = attrs[key] else { - throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" ) - } - - guard let tAttr = attr as? T else { - throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" ) - } - return tAttr + guard let tAttr = attr as? T else { + throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" ) } + return tAttr + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift index bc95f84d8ae98cb8e4e7151f0cf69a574699dc80..40698da5ecb047dbf557cea18556616020ee9750 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift @@ -16,100 +16,118 @@ import Metal import Foundation protocol Fusion { - static func fusionNode() -> Node - static func change() -> [String : [(from: String, to: String)]] - static func fusionType() -> String + static func fusionNode() -> Node + static func change() -> [String : [(from: String, to: String)]] + static func fusionType() -> String + static func needCheck() -> [(Int, String)] +} +extension Fusion { + static func needCheck() -> [(Int, String)] { + return [] + } } protocol Runable { - func run(device: MTLDevice, buffer: MTLCommandBuffer) throws - func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws - func delogOutput() + func run(device: MTLDevice, buffer: MTLCommandBuffer) throws + func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws + func delogOutput() + func inputVariant() -> [String : [Variant]] + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) } extension Runable where Self: OperatorProtocol{ - func run(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try runImpl(device: device, buffer: buffer) - } catch let error { - throw error - } -// print(type + ": " + para.outputDesc()) + func run(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try runImpl(device: device, buffer: buffer) + } catch let error { + throw error } + } + + func inputVariant() -> [String : [Variant]] { +// return [:] + fatalError(" op \(type) need implement inputVariant") + } + + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) { + fatalError(" need implement ") + } + + func delogOutput() { - func delogOutput() { - print(type + ": has no implementation" ) - } + print(type + ": has no implementation" ) + } } protocol Creator where Self: OperatorProtocol{ - associatedtype OpType: OperatorProtocol & Runable & InferShaperable - static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType + associatedtype OpType: OperatorProtocol & Runable & InferShaperable + static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType } extension Creator where Self: OperatorProtocol { - static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType { - do { - return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope) - } catch let error { - throw error - } + static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType { + do { + return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope) + } catch let error { + throw error } + } } protocol InferShaperable { - func inferShape() + func inferShape() } protocol OperatorProtocol { - associatedtype ParamType - associatedtype KerType: Computable where Self.KerType.ParamType == ParamType - var type: String { get } - var scope: Scope { get } - var inputs: [String : [String]] { get } - var paraInputs: [String : [String]] { get set } - var outpus: [String : [String]] { get } - var attrs: [String : Attr] { get } - var para: ParamType { get } - var kernel: KerType { get } - init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws + associatedtype ParamType + associatedtype KerType: Computable where Self.KerType.ParamType == ParamType + var type: String { get } + var scope: Scope { get } + var inputs: [String : [String]] { get } + var paraInputs: [String : [String]] { get set } + var outpus: [String : [String]] { get } + var attrs: [String : Attr] { get } + var para: ParamType { get } + var kernel: KerType { get } + init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws } extension OperatorProtocol { - static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self { - do { - return try Self.init(device: device, opDesc: opDesc, inScope: inScope) - } catch let error { - throw error - } + static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self { + do { + return try Self.init(device: device, opDesc: opDesc, inScope: inScope) + } catch let error { + throw error } + } } class Operator : OperatorProtocol where KernelType.ParamType == ParameterType { - typealias ParamType = ParameterType - typealias KerType = KernelType - let type: String - let inputs: [String : [String]] - var paraInputs: [String : [String]] - let outpus: [String : [String]] - let attrs: [String : Attr] - let para: ParamType - let scope: Scope - var kernel: KerType - required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { - type = opDesc.type - scope = inScope - inputs = opDesc.inputs - outpus = opDesc.outputs - attrs = opDesc.attrs - paraInputs = opDesc.paraInputs - do { - para = try ParamType.init(opDesc:opDesc, inScope: inScope) - } catch let error { - throw error - } - kernel = KernelType.init(device: device, param: para) + typealias ParamType = ParameterType + typealias KerType = KernelType + let type: String + let inputs: [String : [String]] + var paraInputs: [String : [String]] + let outpus: [String : [String]] + let attrs: [String : Attr] + let para: ParamType + let scope: Scope + var kernel: KerType + required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { +// print("create op: \(opDesc.type)") + type = opDesc.type + scope = inScope + inputs = opDesc.inputs + outpus = opDesc.outputs + attrs = opDesc.attrs + paraInputs = opDesc.paraInputs + do { + para = try ParamType.init(opDesc:opDesc, inScope: inScope) + } catch let error { + throw error } + kernel = KernelType.init(device: device, param: para) + } } // op infos @@ -118,22 +136,57 @@ let gFeedType = "feed" let gConvType = "conv2d" let gBatchNormType = "batch_norm" let gReluType = "relu" -let gElementwiseAdd = "elementwise_add" +let gElementwiseAddType = "elementwise_add" let gConvAddBatchNormReluType = "conv_add_batchnorm_relu" let gPooType = "pool2d" let gSoftmaxType = "softmax" let gReshapeType = "reshape" let gConvAddType = "conv_add" +let gDepthConvType = "depthwise_conv2d" +let gPriorBoxType = "prior_box" +let gTransposeType = "transpose" +let gConcatType = "concat" +let gBoxcoderType = "box_coder" +let gMulticlassNMSType = "multiclass_nms" +let gConvBnReluType = "conv_bn_relu" +let gDwConvBnReluType = "depth_conv_bn_relu" +let gPreluType = "prelu" +let gConv2dTransposeType = "conv2d_transpose" +let gBilinearInterpType = "bilinear_interp" +let gSplit = "split" +let gShape = "shape" +let gFlatten = "flatten" +let gConvAddPreluType = "conv_add_prelu" +let gConvAddAddPreluType = "conv_add_add_prelu" +let gElementwiseAddPreluType = "elementwise_add_prelu" let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]), gBatchNormType : (inputs: ["X"], outputs: ["Y"]), gReluType : (inputs: ["X"], outputs: ["Out"]), - gElementwiseAdd : (inputs: ["X"], outputs: ["Out"]), + gElementwiseAddType : (inputs: ["X"], outputs: ["Out"]), gFeedType : (inputs: ["X"], outputs: ["Out"]), gFetchType : (inputs: ["X"], outputs: ["Out"]), gConvAddBatchNormReluType : (inputs: ["Input"], outputs: ["Out"]), gPooType : (inputs: ["X"], outputs: ["Out"]), gSoftmaxType : (inputs: ["X"], outputs: ["Out"]), gReshapeType : (inputs: ["X"], outputs: ["Out"]), - gConvAddType : (inputs: ["Input"], outputs: ["Out"])] + gConvAddType : (inputs: ["Input"], outputs: ["Out"]), + gDepthConvType : (inputs: ["Input"], outputs: ["Output"]), + gConcatType : (inputs: ["X"], outputs: ["Out"]), + gBoxcoderType : (inputs: ["PriorBox", "PriorBoxVar", "TargetBox"], outputs: ["OutputBox"]), + gTransposeType : (inputs: ["X"], outputs: ["Out"]), + gConvBnReluType : (inputs: ["Input"], outputs: ["Out"]), + gDwConvBnReluType : (inputs: ["Input"], outputs: ["Out"]), + gMulticlassNMSType : (inputs: ["BBoxes", "Scores"], outputs: ["Out"]), + gPriorBoxType : (inputs: ["Input", "Image"], outputs: ["Boxes", "Variances"]), + gPreluType : (inputs: ["X"], outputs: ["Out"]), + gConv2dTransposeType : (inputs: ["Input"], outputs: ["Output"]), + gBilinearInterpType : (inputs: ["X"], outputs: ["Out"]), + gSplit : (inputs: ["X"], outputs: ["Out"]), + gShape : (inputs: ["Input"], outputs: ["Out"]), + gFlatten : (inputs: ["X"], outputs: ["Out"]), + gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]), + gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]), + gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"]) + ] diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift index 3761dad60f0f8b20e3f95168445317a3e627ada9..9fc20f8a597d39d3b628c5e1033f9c5cceac45ed 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift @@ -1,62 +1,66 @@ -///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. */ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import Foundation class BatchNormParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope) - output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope) - inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) - inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope) - inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope) - inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) - epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs) - momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs) - is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope) + if input.transpose != [0, 2, 3, 1] { + fatalError("batch norm only accepts NHWC") + } + output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope) + bias = try BatchNormParam.getFirstTensor(key: "Bias", map: opDesc.paraInputs, from: inScope) + mean = try BatchNormParam.getFirstTensor(key: "Mean", map: opDesc.paraInputs, from: inScope) + scale = try BatchNormParam.getFirstTensor(key: "Scale", map: opDesc.paraInputs, from: inScope) + variance = try BatchNormParam.getFirstTensor(key: "Variance", map: opDesc.paraInputs, from: inScope) + epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs) + momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs) + } catch let error { + throw error } - let input: Texture

- var output: Texture

- let inputBias: Tensor - let inputMean: Tensor - let inputScale: Tensor - let inputVariance: Tensor - let epsilon: Float - let momentum: Float - let is_test: Bool + } + let input: Texture

+ var output: Texture

+ let bias: Tensor

+ let mean: Tensor

+ let scale: Tensor

+ let variance: Tensor

+ let epsilon: Float + let momentum: Float } class BatchNormOp: Operator, BatchNormParam

>, Runable, Creator, InferShaperable{ - func inferShape() { - para.output.dim = para.input.dim - } - typealias OpType = BatchNormOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } + typealias OpType = BatchNormOp

+ + func inferShape() { + para.output.dim = para.input.dim + } + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } } - - - - - diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..8db64ac3a473fe59e7821f11abeb3437c337459d --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift @@ -0,0 +1,68 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class BilinearInterpParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope) + out_h = try BilinearInterpParam.getAttr(key: "out_h", attrs: opDesc.attrs) + out_w = try BilinearInterpParam.getAttr(key: "out_w", attrs: opDesc.attrs) + } catch let error { + throw error + } + if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) { + fatalError() + } + } + let input: Texture

+ var output: Texture

+ let out_h: Int + let out_w: Int +} + +class BilinearInterpOp: Operator, BilinearInterpParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = BilinearInterpOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) +// print(outputArray) + print(outputArray.strideArray()) + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..0e1d5f0c53128bbc2f0b5e94d2075eecdef0fcc6 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift @@ -0,0 +1,87 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class BoxcoderParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope) + priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope) + targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope) + output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope) + codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs) + boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs) + } catch let error { + throw error + } + assert(priorBox.tensorDim.cout() == 2) + assert(priorBoxVar.tensorDim.cout() == 2) + assert(targetBox.tensorDim.cout() == 3) + assert(output.tensorDim.cout() == 3) + assert(priorBox.transpose == [0, 1, 2, 3]) + assert(priorBoxVar.transpose == [0, 1, 2, 3]) + assert(targetBox.transpose == [0, 1, 2, 3]) + assert(codeType == "decode_center_size") // encode_center_size is not implemented + assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1) + } + let priorBox: Texture

+ let priorBoxVar: Texture

+ let targetBox: Texture

+ var output: Texture

+ let codeType: String + let boxNormalized: Bool +} + +class BoxcoderOp: Operator, BoxcoderParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = BoxcoderOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let pbv : [Float32] = device.texture2tensor(texture: para.priorBoxVar.metalTexture!, dim: para.priorBoxVar.tensorDim.dims, transpose: para.priorBoxVar.transpose) + let pb : [Float32] = device.texture2tensor(texture: para.priorBox.metalTexture!, dim: para.priorBox.tensorDim.dims, transpose: para.priorBox.transpose) + let tb : [Float32] = device.texture2tensor(texture: para.targetBox.metalTexture!, dim: para.targetBox.tensorDim.dims, transpose: para.targetBox.transpose) + let out : [Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(" prior box var ") + print(pbv.strideArray()) + print(" target box ") + print(tb.strideArray()) + print(" prior box ") + print(pb.strideArray()) + print(" output ") + print(out.strideArray()) + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..8ba74a1c31456d7cb6e9ad67974bc02055313958 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift @@ -0,0 +1,75 @@ +// +// CNNConvAddBatchNormReluOp.swift +// paddle-mobile + +import Foundation + +class CNNMPSConvTestParam: TestParam { + var outputTexture: MTLTexture? + var metalParam: MetalConvParam + let filterPointer: UnsafeMutableRawPointer + let biasePointer: UnsafeMutablePointer + let filterSize: (width: Int, height: Int, channel: Int) + init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) { + metalParam = inMetalParam + filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter) + biasePointer = UnsafeMutablePointer.init(mutating: inBiase) + filterSize = inFilterSize + } +} + +@available(iOS 10.0, *) +class CNNMPSConvOp: Operator, CNNConvParam

>, Runable, Creator, InferShaperable, Fusion { + + typealias OpType = CNNMPSConvOp

+ + required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { + fatalError() + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + } + + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode-->Node.init(inType: gElementwiseAdd); + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gMPSCNNConvType + } + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + guard let xlist = opDesc.inputs["X"] else { + fatalError() + } + for x in xlist { + guard let variant = inScope[x], let v = variant as? Texture

else { + fatalError() + } + if transpose.count == 0 { + transpose = v.transpose + } + if v.transpose != transpose { + fatalError() + } + + input.append(v) + } + axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs) + output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope) + } catch let error { + throw error + } + } + var input: [Texture

] = [] + var output: Texture

+ var transpose: [Int] = [] + let axis: Int +} + +class ConcatOp: Operator, ConcatParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = ConcatOp

+ + func inferShape() { + // let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]} + // para.output.dim = Dim.init(inDim: dim) + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } + +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..e5bded65a1a8944d337fea65995af79cab580105 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddAddPreluOp.swift @@ -0,0 +1,108 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ConvAddAddPreluParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvAddAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvAddAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvAddAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvAddAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs) + groups = try ConvAddAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs) + alpha = try ConvAddAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) + mode = try ConvAddAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + y = try ConvAddAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + let input: Texture

+ let y: Tensor + let filter: Tensor + let mode: String + let alpha: Tensor

+ var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +class ConvAddAddPreluOp: Operator, ConvAddAddPreluParam

>, Runable, Creator, InferShaperable, Fusion{ + typealias OpType = ConvAddAddPreluOp

+ + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvAddAddPreluType + } + + static func needCheck() -> [(Int, String)] { + return [(2, "Y"), (2, "X")] + } + + + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) - input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope) - output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope) - stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs) - paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs) - dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs) - epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs) - - groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs) - variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) - bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) - scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) - mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope) - y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + + filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs) + epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs) + + groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs) + variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) + bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) + + scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) + mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope) + y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error } - - let input: Texture

- - let variance: Tensor - let bias: Tensor - let mean: Tensor - let scale: Tensor - let y: Tensor - let filter: Tensor - let epsilon: Float32 - var newScale: MTLBuffer? - var newBiase: MTLBuffer? - - var output: Texture

- let stride: [Int32] - let paddings: [Int32] - let dilations: [Int32] - let groups: Int + } + + let input: Texture

+ + let variance: Tensor + let bias: Tensor + let mean: Tensor + let scale: Tensor + let y: Tensor + let filter: Tensor + let epsilon: Float32 + var newScale: MTLBuffer? + var newBiase: MTLBuffer? + + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int } class ConvAddBatchNormReluOp: Operator, ConvAddBatchNormReluParam

>, Runable, Creator, InferShaperable, Fusion{ - typealias OpType = ConvAddBatchNormReluOp

+ + typealias OpType = ConvAddBatchNormReluOp

+ + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations - func inferShape() { - let inDims = para.input.dim - let filterDim = para.filter.dim - let strides = para.stride - let paddings = para.paddings - let dilations = para.dilations - - var outDim = [inDims[0]] - for i in 0.. Node { - let beginNode = Node.init(inType: gConvType) - _ = beginNode - --> Node.init(inType: gElementwiseAdd) - --> Node.init(inType: gBatchNormType) - --> Node.init(inType: gReluType) - return beginNode + outDim.append(filterDim[0]) + para.output.dim = Dim.init(inDim: outDim) + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gElementwiseAddType) + --> Node.init(inType: gBatchNormType) + --> Node.init(inType: gReluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvAddBatchNormReluType + } + + func delogOutput() { + print(" conv add batchnorm relu output ") + print(para.output.toTensor().strideArray()) + // let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false) + // para.filter.logDataPointer(header: "filter data pointer: ") + // print("filter: \(para.filter)") - static func change() -> [String : [(from: String, to: String)]] { - return [:] - } + // print("biase: \(para.y)") + // print("padding: \(para.paddings)") + // print("stride: \(para.stride)") - static func fusionType() -> String { - return gConvAddBatchNormReluType - } + // let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false) + // let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false) + // let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false) - func delogOutput() { - -// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false) -// para.filter.logDataPointer(header: "filter data pointer: ") -// print("filter: \(para.filter)") - -// print("biase: \(para.y)") -// print("padding: \(para.paddings)") -// print("stride: \(para.stride)") - -// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false) -// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false) -// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false) - - let output = para.output.metalTexture.floatArray { (p: P) -> P in - return p - } -// - writeToLibrary(fileName: "output_112x112x32_2", array: output) - print(" write done") - -// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false) - } + // let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false) + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift index 40069f6550ea00e986926f40c5fc2a2d4bf22a83..5e184844d886beb19ac5ff297f8a270af8a076fa 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift @@ -15,79 +15,102 @@ import Foundation class ConvAddParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) - input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope) - output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope) - stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs) - paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs) - dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs) - groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs) - y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs) + groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs) + + y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error } - - let input: Texture

- let y: Tensor - let filter: Tensor - - var output: Texture

- let stride: [Int32] - let paddings: [Int32] - let dilations: [Int32] - let groups: Int + } + + let input: Texture

+ let y: Tensor + let filter: Tensor + + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int } class ConvAddOp: Operator, ConvAddParam

>, Runable, Creator, InferShaperable, Fusion{ - static func fusionNode() -> Node { - let beginNode = Node.init(inType: gConvType) - _ = beginNode - --> Node.init(inType: gElementwiseAdd) - return beginNode - } - - static func change() -> [String : [(from: String, to: String)]] { - return [:] - } - - static func fusionType() -> String { - return gConvAddType - } + typealias OpType = ConvAddOp

+ + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gElementwiseAddType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvAddType + } + + func inferShape() { - typealias OpType = ConvAddOp

+ let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations - func inferShape() { - let inDims = para.input.dim - let filterDim = para.filter.dim - let strides = para.stride - let paddings = para.paddings - let dilations = para.dilations - - var outDim = [inDims[0]] - for i in 0..: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs) + groups = try ConvAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs) + alpha = try ConvAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) + mode = try ConvAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + y = try ConvAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + let input: Texture

+ let y: Tensor + let filter: Tensor + let mode: String + let alpha: Tensor

+ var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +class ConvAddPreluOp: Operator, ConvAddPreluParam

>, Runable, Creator, InferShaperable, Fusion{ + typealias OpType = ConvAddPreluOp

+ + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvAddPreluType + } + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvBNReluParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvBNReluParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvBNReluParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvBNReluParam.getAttr(key: "dilations", attrs: opDesc.attrs) + epsilon = try ConvBNReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs) + + groups = try ConvBNReluParam.getAttr(key: "groups", attrs: opDesc.attrs) + variance = try ConvBNReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) + bias = try ConvBNReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) + scale = try ConvBNReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) + mean = try ConvBNReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + let input: Texture

+ + let variance: Tensor + let bias: Tensor + let mean: Tensor + let scale: Tensor + let filter: Tensor + let epsilon: Float32 + var newScale: MTLBuffer? + var newBiase: MTLBuffer? + + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +class ConvBNReluOp: Operator, ConvBNReluParam

>, Runable, Creator, InferShaperable, Fusion{ + typealias OpType = ConvBNReluOp

+ + func inputs() -> [Variant] { + return [para.input, para.variance, para.bias, para.mean, para.scale, para.filter] + } + + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0.. Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gBatchNormType) + --> Node.init(inType: gReluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvBnReluType + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray()) + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift index 29b0c4246e728dbc3d3b865a189c7063ac1bbdcf..e82eb1f4753f0ebfdb5a949c85181a0ae52ea2da 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift @@ -15,74 +15,67 @@ import Foundation class ConvParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) - input = try ConvParam.input(inputs: opDesc.inputs, from: inScope) - output = try ConvParam.output(outputs: opDesc.outputs, from: inScope) - stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs) - paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs) - dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs) - groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs) - - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvParam.output(outputs: opDesc.outputs, from: inScope) + stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs) + groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs) + + } catch let error { + throw error } - - let input: Texture

- let filter: Tensor - var output: Texture

- let stride: [Int32] - let paddings: [Int32] - let dilations: [Int32] - let groups: Int + } + + let input: Texture

+ let filter: Tensor + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int } class ConvOp: Operator, ConvParam

>, Runable, Creator, InferShaperable { - required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { - do { - try super.init(device: device, opDesc: opDesc, inScope: inScope) - } catch let error { - throw error - } - - } - func inferShape() { - let inDims = para.input.dim - let filterDim = para.filter.dim - let strides = para.stride - let paddings = para.paddings - let dilations = para.dilations - - var outDim = [inDims[0]] - for i in 0.. + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations - typealias OpType = ConvOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } + var outDim = [inDims[0]] + for i in 0..: ConvParam

{ + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + try super.init(opDesc: opDesc, inScope: inScope) + } catch let error { + throw error + } + } +} + +class ConvTransposeOp: Operator, ConvTransposeParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = ConvTransposeOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + + print(" \(type) output: ") + let padToFourDim = para.output.padToFourDim + if para.output.transpose == [0, 1, 2, 3] { + let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) + print(outputArray.strideArray()) + } else if para.output.transpose == [0, 2, 3, 1] { + let output = para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])) + print(output.strideArray()) + } else { + print(" not implement") + } + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..ec76eecf1fc9736d9dff6a4cf0d69a314a9b1e0d --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/DepthwiseConvOp.swift @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class DepthConvOp: Operator, ConvParam

>, Runable, Creator, InferShaperable { + + typealias OpType = DepthConvOp

+ + required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { + do { + try super.init(device: device, opDesc: opDesc, inScope: inScope) + } catch let error { + throw error + } + } + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: Operator, ConvBNReluParam

>, Runable, Creator, InferShaperable, Fusion{ + typealias OpType = ConvBNReluOp

+ + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0.. Node { + let beginNode = Node.init(inType: gDepthConvType) + _ = beginNode + --> Node.init(inType: gBatchNormType) + --> Node.init(inType: gReluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gDwConvBnReluType + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray()) + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift index 5ed36f86d79ffd639dc2ba76da74d24a532b1bd1..ae040dd65f74fc222275bc579338107f2ea188fd 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift @@ -15,33 +15,80 @@ import Foundation class ElementwiseAddParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) - inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) - - output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) - axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) + axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs) + } catch let error { + throw error } - let input: Texture

- let inputY: Tensor

- var output: Texture

- let axis: Int + do { + inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch _ { + let tensorY: Tensor

= try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) + let device = inputX.metalTexture!.device + inputY = Texture.init(device: device, inDim: tensorY.dim) + let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) + inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision) + } + +// required init(device: MTLDevice, param: ElementwiseAddParam

) { +// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) +// if computePrecision == .Float32 { +// super.init(device: device, inFunctionName: "elementwise_add") +// } else if computePrecision == .Float16 { +// super.init(device: device, inFunctionName: "elementwise_add_half") +// } else { +// fatalError() +// } +// } + + var offset = axis + if axis == -1 { + offset = inputX.tensorDim.cout() - inputY.tensorDim.cout() + } + for i in 0..<(inputY.tensorDim.cout()) { + assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i]) + } + } + + var inputX: Texture

+ var inputY: Texture

+ var output: Texture

+ var axis: Int } class ElementwiseAddOp: Operator, ElementwiseAddParam

>, Runable, Creator, InferShaperable{ - - func inferShape() { - para.output.dim = para.input.dim + typealias OpType = ElementwiseAddOp

+ + func inferShape() { +// para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output) - typealias OpType = ElementwiseAddOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + let padToFourDim = para.output.padToFourDim + if para.output.transpose == [0, 1, 2, 3] { + let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) + print(outputArray.strideArray()) + } else if para.output.transpose == [0, 2, 3, 1] { + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + } else { + print(" not implement") } + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..333303e9bb7c1224ff50d69b5523edabe0fc81a6 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddPreluOp.swift @@ -0,0 +1,119 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ElementwiseAddPreluParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) + mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + inputX = try ElementwiseAddPreluParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try ElementwiseAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + axis = try ElementwiseAddPreluParam.getAttr(key: "axis", attrs: opDesc.attrs) + } catch let error { + throw error + } + do { + inputY = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch _ { + let tensorY: Tensor

= try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + let device = inputX.metalTexture!.device + inputY = Texture.init(device: device, inDim: tensorY.dim) + let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) + inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision) + } + + // required init(device: MTLDevice, param: ElementwiseAddParam

) { + // param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) + // if computePrecision == .Float32 { + // super.init(device: device, inFunctionName: "elementwise_add") + // } else if computePrecision == .Float16 { + // super.init(device: device, inFunctionName: "elementwise_add_half") + // } else { + // fatalError() + // } + // } + + var offset = axis + if axis == -1 { + offset = inputX.tensorDim.cout() - inputY.tensorDim.cout() + } + for i in 0..<(inputY.tensorDim.cout()) { + assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i]) + } + } + + let mode: String + let alpha: Tensor

+ var inputX: Texture

+ var inputY: Texture

+ var output: Texture

+ var axis: Int +} + +class ElementwiseAddPreluOp: Operator, ElementwiseAddPreluParam

>, Runable, Creator, InferShaperable, Fusion{ + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gElementwiseAddType) + _ = beginNode + --> Node.init(inType: gPreluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gElementwiseAddPreluType + } + + typealias OpType = ElementwiseAddPreluOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + + + func delogOutput() { + print(" \(type) output: ") + print(para.output) + + let padToFourDim = para.output.padToFourDim + if para.output.transpose == [0, 1, 2, 3] { + let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) + print(outputArray.strideArray()) + } else if para.output.transpose == [0, 2, 3, 1] { + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + } else { + print(" not implement") + } + } +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift index c81d9e786c91408d2412b30eaec089904df75751..382ea58b844b25bb855ed7cdc155a860bca45da5 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift @@ -15,54 +15,53 @@ import Foundation class FeedParam: OpParam{ - var output: Texture

- var input: InputTexture { - return scope.input() as! InputTexture + var output: Texture

+ var input: InputTexture { + return scope.input() as! InputTexture + } + let scope: Scope + + required init(opDesc: OpDesc, inScope: Scope) throws { + scope = inScope + do { + output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope) + } catch let error { + throw error } - let scope: Scope - - required init(opDesc: OpDesc, inScope: Scope) throws { - scope = inScope - do { - output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope) - } catch let error { - throw error - } - } - - typealias ParamPrecisionType = P + } + + typealias ParamPrecisionType = P } class FeedOp: Operator, FeedParam

>, Runable, Creator, InferShaperable { - typealias OpType = FeedOp

- - func inferShape() { - // print("feed input: \(para.input.expectDim)") - print("feed output: \(para.output.dim)") - // para.output.dim = - // para.output.dim = para.input.expectDim - } - - func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } - -// let resizeKernel = ResizeKernel

.init(device: device) -// let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim) -// do { -// try resizeKernel.compute(commandBuffer: buffer, param: resizeParam) -// } catch let error { -// throw error -// } + typealias OpType = FeedOp

+ + func inferShape() { + // print("feed input: \(para.input.expectDim)") + print("feed output: \(para.output.dim)") + // para.output.dim = + // para.output.dim = para.input.expectDim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } - func delogOutput() { -// para.input.mtlTexture.logDesc() -// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true) -// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false) - } + // let resizeKernel = ResizeKernel

.init(device: device) + // let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim) + // do { + // try resizeKernel.compute(commandBuffer: buffer, param: resizeParam) + // } catch let error { + // throw error + // } + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray()) + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift index 2964b89e5ddabbbbd4f2df032efa5ef2db82ec96..ade5b09099b69f4784b33a3b108cfcfe1aa1ea7f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift @@ -15,40 +15,73 @@ import Foundation class FetchParam: OpParam{ - var output: Texture

- let input: Texture

- let scope: Scope - required init(opDesc: OpDesc, inScope: Scope) throws { - scope = inScope - do { - input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) - output = input - } catch let error { - throw error - } + var output: FetchHolder + let input: Texture

+ let scope: Scope + required init(opDesc: OpDesc, inScope: Scope) throws { + scope = inScope + do { + input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) + output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims) + scope.setOutput(output: output) + } catch let error { + throw error } - - typealias ParamPrecisionType = P + } + + typealias ParamPrecisionType = P } class FetchKernel: Kernel, Computable { - - func compute(commandBuffer: MTLCommandBuffer, param: FetchParam

) throws { + + func compute(commandBuffer: MTLCommandBuffer, param: FetchParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } - - required init(device: MTLDevice, param: FetchParam

) { - super.init(device: device, inFunctionName: "texture2d_to_2d_array") + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: FetchParam

) { + param.output.initBuffer(device: device) + if computePrecision == .Float16 { + if param.input.transpose == [0, 2, 3, 1] { + super.init(device: device, inFunctionName: "fetch_half") + } else { +// fatalError(" not support ") + super.init(device: device, inFunctionName: "fetch_placeholder_half") + print(" not support ") + } + } else if computePrecision == .Float32 { + if param.input.transpose == [0, 2, 3, 1] { + super.init(device: device, inFunctionName: "fetch") + } else { + print(" not support ") + super.init(device: device, inFunctionName: "fetch_placeholder") +// fatalError(" not support ") + } + } else { + fatalError(" not support ") } + } } -class FetchOp: Operator< FetchKernel

, FetchParam

>, Runable, Creator, InferShaperable{ - func inferShape() { - print(para.input.dim) - } - - typealias OpType = FetchOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - scope.setOutput(output: para.output) +class FetchOp: Operator< FetchKernel

, FetchParam

>, Runable, Creator, InferShaperable { + + typealias OpType = FetchOp

+ + func inferShape() { + print(para.input.dim) + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..4fc5f222932ce98c4bf3e29bdf6cd8c666f5f9f1 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/FlattenOp.swift @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class FlattenParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope) + axis = try FlattenParam.getAttr(key: "axis", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + let input: Texture

+ var output: Texture

+ let axis: Int +} + + +class FlattenOp: Operator, FlattenParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = FlattenOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..f58358761f820809685510fa4e9b5ff237567b3c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift @@ -0,0 +1,94 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Metal +import Foundation + +public protocol TestParam { +} + +public protocol Testable { + associatedtype TestParamType: TestParam + func test(commandBuffer: MTLCommandBuffer, param: TestParamType) + init(device: MTLDevice, testParam: TestParamType) +} + + +protocol Computable { + associatedtype ParamType: OpParam + func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws + init(device: MTLDevice, param: ParamType) +} + +protocol KernelProtocol { + var pipline: MTLComputePipelineState { get set } + var functionName: String { get set } + +} + +open class Kernel { + let pipline: MTLComputePipelineState + let functionName: String + public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) { + pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib) + functionName = inFunctionName + } +} + +open class CusomKernel: Kernel { + public struct Shape { + public let width: Int + public let height: Int + public let channel: Int + public init(inWidth: Int, inHeight: Int, inChannel: Int){ + width = inWidth + height = inHeight + channel = inChannel + } + } + public let outputTexture: MTLTexture + public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) { + let textureDesc = MTLTextureDescriptor.init() + textureDesc.textureType = .type2D + textureDesc.width = outputDim.width + textureDesc.height = outputDim.height + textureDesc.depth = (outputDim.channel + 3) / 4 + + if computePrecision == .Float16 { + textureDesc.pixelFormat = .rgba16Float + } else if computePrecision == .Float32 { + textureDesc.pixelFormat = .rgba32Float + } else { + fatalError() + } + + textureDesc.usage = [.shaderRead, .shaderWrite] + textureDesc.storageMode = .shared + outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error " + + super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib) + } + + public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + encoder.setTexture(inputTexuture, index: 0) + encoder.setTexture(outputTexture, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: outputTexture) + encoder.endEncoding() + } + +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift index bae452dec331957ceda5a6f503802352f63a6dbe..dad8d0c6ac2e5a93273573473c700179f8b90a37 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift @@ -15,53 +15,39 @@ import Foundation class BatchNormKernel: Kernel, Computable { - var newScale: MTLBuffer - var newBias: MTLBuffer - - required init(device: MTLDevice, param: BatchNormParam

) { - guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else { - fatalError() - } - guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else { - fatalError() - } - self.newScale = newScale - self.newBias = newBias - - super.init(device: device, inFunctionName: "batchnorm") - - let varianceBuffer : MTLBuffer = param.inputVariance.buffer - - var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length) - let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self) - for i in 0..<(varianceBuffer.length / MemoryLayout

.stride) { - invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot() - } - - let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self) - let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self) - let scale : MTLBuffer = param.inputScale.buffer - let scaleContents = scale.contents().assumingMemoryBound(to: P.self) - let bias : MTLBuffer = param.inputBias.buffer - let biasContents = bias.contents().assumingMemoryBound(to: P.self) - let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self) - - for i in 0..<(newScale.length / MemoryLayout

.stride) { - newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i])) - newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i])) - } + required init(device: MTLDevice, param: BatchNormParam

) { + let count = param.variance.dim.numel() + let varianceP = param.variance.data.pointer + let meanP = param.mean.data.pointer + let scaleP = param.scale.data.pointer + let biasP = param.bias.data.pointer + for i in 0..) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encoder is nil") - } - print("BatchNorm compute") - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBuffer(newScale, offset: 0, index: 0) - encoder.setBuffer(newBias, offset: 0, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + + param.bias.initBuffer(device: device, precision: computePrecision) + param.scale.initBuffer(device: device, precision: computePrecision) + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "batchnorm") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "batchnorm_half") + } else { + fatalError() + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBuffer(param.scale.buffer, offset: 0, index: 0) + encoder.setBuffer(param.bias.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..fca5719553038732b1646fb8b15885bd03bd5624 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift @@ -0,0 +1,91 @@ +// +// BatchNormRelu.swift +// paddle-mobile +// +// Created by zhangxinjun on 2018/8/23. +// Copyright © 2018年 orange. All rights reserved. +// + +import Foundation + + +class BatchNormReluParam: BatchNormParam

{ + +} + +class BatchNormReluKernel: Kernel, Computable{ + + + typealias ParamType = BatchNormReluParam

+ var newScale: MTLBuffer + var newBias: MTLBuffer + + required init(device: MTLDevice, testParam: BatchNormReluTestParam) { + + newScale = testParam.newScaleBuffer + newBias = testParam.newBiaseBuffer + + super.init(device: device, inFunctionName: "batch_norm_relu_3x3") + } + + required init(device: MTLDevice, param: BatchNormReluParam

) { + guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else { + fatalError() + } + guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else { + fatalError() + } + self.newScale = newScale + self.newBias = newBias + + super.init(device: device, inFunctionName: "batch_norm_relu_3x3") + + + let varianceBuffer : MTLBuffer = param.inputVariance.buffer + + var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length) + let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self) + for i in 0..<(varianceBuffer.length / MemoryLayout

.stride) { + invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot() + } + + let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self) + let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self) + let scale : MTLBuffer = param.inputScale.buffer + let scaleContents = scale.contents().assumingMemoryBound(to: P.self) + let bias : MTLBuffer = param.inputBias.buffer + let biasContents = bias.contents().assumingMemoryBound(to: P.self) + let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self) + + for i in 0..<(newScale.length / MemoryLayout

.stride) { + newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i])) + newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i])) + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + encoder.setTexture(param.input as? MTLTexture, index: 0) + encoder.setTexture(param.output as? MTLTexture, index: 1) + encoder.setBuffer(newScale, offset: 0, index: 1) + encoder.setBuffer(newBias, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture) + encoder.endEncoding() + } + + func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + encoder.setTexture(testParam.inputTexture, index: 0) + encoder.setTexture(testParam.outputTexture, index: 1) + encoder.setBuffer(newScale, offset: 0, index: 0) + encoder.setBuffer(newBias, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture) + encoder.endEncoding() + } + + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..7f3e7433760cc1fa4d093b08027bce7c79172532 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift @@ -0,0 +1,55 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct BilinearInterpMetalParam { + var ratio_h: Float32 + var ratio_w: Float32 +} + +class BilinearInterpKernel: Kernel, Computable{ + func compute(commandBuffer: MTLCommandBuffer, param: BilinearInterpParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + var ratio_h: Float32 = 0 + var ratio_w: Float32 = 0 + if param.output.tensorDim.dims[2] > 1 { + ratio_h = Float32(param.input.tensorDim.dims[2]-1) / Float32(param.output.tensorDim.dims[2]-1) + } + if param.output.tensorDim.dims[3] > 1 { + ratio_w = Float32(param.input.tensorDim.dims[3]-1) / Float32(param.output.tensorDim.dims[3]-1) + } + var p = BilinearInterpMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w) + encoder.setBytes(&p, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: BilinearInterpParam

) { + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "bilinear_interp_float") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "bilinear_interp_half") + } else { + fatalError() + } + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..c084d9b28e1dc7019a14d3ae317ddf8a64547830 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift @@ -0,0 +1,46 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct BoxcoderMetalParam { +} + +class BoxcoderKernel: Kernel, Computable{ + func compute(commandBuffer: MTLCommandBuffer, param: BoxcoderParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + encoder.setTexture(param.priorBox.metalTexture, index: 0) + encoder.setTexture(param.priorBoxVar.metalTexture, index: 1) + encoder.setTexture(param.targetBox.metalTexture, index: 2) + encoder.setTexture(param.output.metalTexture, index: 3) + var bmp = BoxcoderMetalParam.init() + encoder.setBytes(&bmp, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: BoxcoderParam

) { + param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: computePrecision) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "boxcoder_float") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "boxcoder_half") + } else { + fatalError() + } + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..14a5bd521455632c8a67e4c1a8ebdedc6c460aa5 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift @@ -0,0 +1,176 @@ +// +// CNNConvKernel.swift +// paddle-mobile +// + +import Foundation +import Metal +import Accelerate +import MetalPerformanceShaders + +@available(iOS 10.0, *) +class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource { + + let desc: MPSCNNConvolutionDescriptor + let weight:UnsafeMutableRawPointer + let bias:UnsafeMutablePointer + + + + init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer) { + desc = inDesc + weight = inWeight + bias = inBias + } + + + func dataType() -> MPSDataType { + return .float32 + } + + func descriptor() -> MPSCNNConvolutionDescriptor { + return desc + } + + func weights() -> UnsafeMutableRawPointer { + return self.weight + } + + func biasTerms() -> UnsafeMutablePointer? { + return self.bias + } + + func load() -> Bool { + return true + } + + func purge() { + } + + func label() -> String? { + return "Conv" + } + + +} + +@available(iOS 10.0, *) +class CNNConvParam: OpParam{ + + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope) + output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs) + // 暂时不用关心 + dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs) + // 暂时不用关心 + groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs) + + variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) + // bias + y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + var input: Texture

+ let variance: Tensor + let y: Tensor + let filter: Tensor + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +@available(iOS 10.0, *) +class CNNConvKernel: Kernel, Computable { + + typealias ParamType = CNNConvParam

+ + var mpsImageCreator: MpsImageCreator

? + var activation:MPSCNNNeuron? + var conv:MPSCNNConvolution? + var weightDataSource:WeightsDataSource? + var param: CNNConvParam

? + var device: MTLDevice? + + + required init(device:MTLDevice, testParam:CNNMPSConvTestParam) { + self.device = device + + let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation) + + desc.strideInPixelsX = Int(testParam.metalParam.offsetX) + desc.strideInPixelsY = Int(testParam.metalParam.offsetY) + + + weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer) + + if #available(iOS 11.0, *) { + conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!) + } else { + // Fallback on earlier versions + } + + super.init(device: device, inFunctionName: "") + } + + required init(device:MTLDevice, param:CNNConvParam

) { + + self.device = device + + let inChannels: Int + let outChannels: Int + + if param.y.dim.cout() == 4 { + inChannels = (param.y.dim[3]) + outChannels = inChannels + } else { + inChannels = 0 + outChannels = inChannels + } + + let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation) + + desc.strideInPixelsX = Int(param.stride[0]) + desc.strideInPixelsY = Int(param.stride[1]) + + + weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer, inBias: param.y.data.pointer as! UnsafeMutablePointer) + + if #available(iOS 11.0, *) { + conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!) + } else { + // Fallback on earlier versions + } + + super.init(device: device, inFunctionName: "") + } + + func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam

) throws { + let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))! + var outputImage = (mpsImageCreator?.createMPSImage(device: device!))! + + // 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用 + conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage) + + param.input = outputImage.texture as! Texture

+ } + + func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws { + let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))! + var outputImage = (mpsImageCreator?.createMPSImage(device: device!))! + + // 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用 + conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage) + + testParam.outputTexture = outputImage.texture + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift new file mode 100644 index 0000000000000000000000000000000000000000..25f0a21bfff420566d06a59dca626805dd0ce6e0 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Concat.swift @@ -0,0 +1,31 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ConcatKernel: Kernel, Computable{ + func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: ConcatParam

) { + super.init(device: device, inFunctionName: "concat") + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..81ef46c0b3e919615d07f667851007e95b02d54f --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift @@ -0,0 +1,147 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct ConcatTestParam: TestParam { + var input: [MTLTexture] + var output: MTLTexture + var dims: [[Int]] + var axis: Int + var odim: [Int] +} + +struct ConcatMetalParam { + var odim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1) + var axis: Int32 = 0 + var offset: Int32 = 0 + var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) + var vdim: (Int32, Int32, Int32, Int32, Int32, Int32) = (0, 0, 0, 0, 0, 0) +} + +class ConcatKernel: Kernel, Computable{ + var v = "normal" + var pm = ConcatMetalParam.init() + func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam

) throws { + + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + let num = param.input.count + for i in 0...size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: ConcatParam

) { + param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision) + let orank = param.output.tensorDim.cout() + let num = param.input.count + assert(num <= 6) + var axis = 4 - param.output.tensorDim.cout() + param.axis + for i in 0..<4 { + if param.transpose[i] == axis { + axis = i + break + } + } + pm.axis = Int32(axis) + pm.odim = (Int32(param.output.dim[0]), Int32(param.output.dim[1]), Int32(param.output.dim[2]), Int32(param.output.dim[3])) + pm.trans = (Int32(param.output.transpose[0]), Int32(param.output.transpose[1]), Int32(param.output.transpose[2]), Int32(param.output.transpose[3])) + var vdim: [Int] = [0, 0, 0, 0, 0, 0] + for i in 0..: Kernel, Computable { + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvAddAddPreluParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.y.initBuffer(device: device, precision: computePrecision) + param.alpha.initBuffer(device: device, precision: computePrecision) + + if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half") + } + + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half") + } + } else { + fatalError(" unsupport yet ") + } + } else if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float") + } + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float") + } + } else { + fatalError(" unsupport yet ") + } + } else { + fatalError() + } + + let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1]) + + let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0]) + + // print(" function: \(functionName)") + // print("offset x: \(offsetX)") + // print("offset y: \(offsetY)") + + let offsetZ = 0.0 + let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + // print("metal param: ") + // print(inMetalParam) + + metalParam = inMetalParam + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddAddPreluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift index 0ffe90272fe36fa30d58c7c6bd1e287d49f0e92a..66324dd47086fd7c1ccffb674c0f8b8623416e0d 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift @@ -15,124 +15,165 @@ import Foundation struct ConvAddBatchNormReluTestParam: TestParam { - let inputTexture: MTLTexture - let outputTexture: MTLTexture - var metalParam: MetalConvParam - let filterBuffer: MTLBuffer - let biaseBuffer: MTLBuffer - let newScaleBuffer: MTLBuffer - let newBiaseBuffer: MTLBuffer - let filterSize: (width: Int, height: Int, channel: Int) - init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { - inputTexture = inInputTexture - outputTexture = inOutputTexture - metalParam = inMetalParam - filterBuffer = inFilterBuffer - biaseBuffer = inBiaseBuffer - newScaleBuffer = inNewScaleBuffer - newBiaseBuffer = inNewBiaseBuffer - filterSize = inFilterSize - } + let inputTexture: MTLTexture + let outputTexture: MTLTexture + var metalParam: MetalConvParam + let filterBuffer: MTLBuffer + let biaseBuffer: MTLBuffer + let newScaleBuffer: MTLBuffer + let newBiaseBuffer: MTLBuffer + let filterSize: (width: Int, height: Int, channel: Int) + init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { + inputTexture = inInputTexture + outputTexture = inOutputTexture + metalParam = inMetalParam + filterBuffer = inFilterBuffer + biaseBuffer = inBiaseBuffer + newScaleBuffer = inNewScaleBuffer + newBiaseBuffer = inNewBiaseBuffer + filterSize = inFilterSize + } } class ConvAddBatchNormReluKernel: Kernel, Computable, Testable { - required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) { - if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") - } else if testParam.filterSize.channel == 1 { - super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") - } else { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") - } + required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) { + if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") + } else if testParam.filterSize.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") + } else { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") } + } + + var metalParam: MetalConvParam! + + required init(device: MTLDevice, param: ConvAddBatchNormReluParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.y.initBuffer(device: device, precision: computePrecision) + param.variance.initBuffer(device: device, precision: .Float32) + param.mean.initBuffer(device: device, precision: .Float32) + param.scale.initBuffer(device: device, precision: .Float32) + param.bias.initBuffer(device: device, precision: .Float32) - var metalParam: MetalConvParam! - - required init(device: MTLDevice, param: ConvAddBatchNormReluParam

) { - - if param.filter.width == 1 && param.filter.height == 1 { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") - } else if param.filter.channel == 1 { - super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") - } else { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") - } - - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - param.variance.initBuffer(device: device) - param.mean.initBuffer(device: device) - param.scale.initBuffer(device: device) - param.bias.initBuffer(device: device) - - let offsetX = param.filter.width/2 - Int(param.paddings[0]) - let offsetY = param.filter.height/2 - Int(param.paddings[1]) - - print("offset x: \(offsetX)") - print("offset y: \(offsetY)") - - let offsetZ = 0.0 - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) - - var invs: [P] = [] - let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) - - for i in 0...stride { - let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) - invs.append(P(inv)) - } - - let newScale: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.scale.buffer.length) - let newBiase: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.bias.buffer.length) - - let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self) - let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self) - let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self) - for i in 0...stride { - newScale[i] = invs[i] * scaleContents[i] - newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i] - } - param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length) - param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length) - - newScale.deinitialize(count: param.scale.buffer.length) - newScale.deallocate() - - newBiase.deinitialize(count: param.bias.buffer.length) - newBiase.deallocate() + if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") + } else { + fatalError(" unsupport ") + } + } else if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half") + } else { + fatalError(" unsupport ") + } + } else { + fatalError() } - func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.setBuffer(param.y.buffer, offset: 0, index: 2) - encoder.setBuffer(param.newScale!, offset: 0, index: 3) - encoder.setBuffer(param.newBiase!, offset: 0, index: 4) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + let offsetX = param.filter.width/2 - Int(param.paddings[0]) + let offsetY = param.filter.height/2 - Int(param.paddings[1]) + + print("offset x: \(offsetX)") + print("offset y: \(offsetY)") + + let offsetZ = 0.0 + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + + var invs: [P] = [] + let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) + + for i in 0...stride { + let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) + invs.append(P(inv)) + } + + let newScale: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.scale.buffer.length) + let newBiase: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.bias.buffer.length) + + let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self) + let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self) + let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self) + for i in 0...stride { + newScale[i] = invs[i] * scaleContents[i] + newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i] } - public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - fatalError() - } - - encoder.setTexture(param.inputTexture, index: 0) - encoder.setTexture(param.outputTexture, index: 1) - var inMetalParam = param.metalParam - encoder.setBytes(&inMetalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filterBuffer, offset: 0, index: 1) - encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2) - encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3) - encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4) - encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) - encoder.endEncoding() +// var newScaleFP16: UnsafeMutableRawPointer +// +// float32ToFloat16(input: newScale as! UnsafeMutablePointer, output: newScaleFP16, count: param.scale.buffer.length / MemoryLayout

.size) + + +// let newBiaseFloat16 = device.makeBuffer(length: <#T##Int#>, options: <#T##MTLResourceOptions#>) + + var newBiaseBuffer: MTLBuffer + var newScaleBuffer: MTLBuffer + + if computePrecision == .Float32 { + newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! + newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! + } else if computePrecision == .Float16 { + + newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! + newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! + + float32ToFloat16(input: newBiase as! UnsafeMutablePointer, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout

.size) + + float32ToFloat16(input: newScale as! UnsafeMutablePointer, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout

.size) + } else { + fatalError(" unsupport ") } + + param.newBiase = newBiaseBuffer + param.newScale = newScaleBuffer + + newScale.deinitialize(count: param.scale.buffer.length) + newScale.deallocate() + + newBiase.deinitialize(count: param.bias.buffer.length) + newBiase.deallocate() + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.setBuffer(param.newScale!, offset: 0, index: 3) + encoder.setBuffer(param.newBiase!, offset: 0, index: 4) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + + encoder.setTexture(param.inputTexture, index: 0) + encoder.setTexture(param.outputTexture, index: 1) + var inMetalParam = param.metalParam + encoder.setBytes(&inMetalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filterBuffer, offset: 0, index: 1) + encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2) + encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3) + encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4) + encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift index 81f3aacba8dded3341237e05f9afbc1e04f70596..d5aa98d2606ceda5cbcf0f3f4c1fc0ed2adeed25 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift @@ -15,33 +15,73 @@ import Foundation class ConvAddKernel: Kernel, Computable { - var metalParam: MetalConvParam! - required init(device: MTLDevice, param: ConvAddParam

) { + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvAddParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.y.initBuffer(device: device, precision: computePrecision) + + if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_1x1_half") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_add_3x3_half") + } else if param.filter.width == 1 && param.filter.height == 5 { + super.init(device: device, inFunctionName: "conv_add_5x1_half") + } else if param.filter.width == 5 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_1x5_half") + } else { + fatalError(" unsupport yet ") + } + } else if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { super.init(device: device, inFunctionName: "conv_add_1x1") - let offsetX = param.filter.width/2 - Int(param.paddings[0]) - let offsetY = param.filter.height/2 - Int(param.paddings[1]) - - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - print("offset x: \(offsetX)") - print("offset y: \(offsetY)") - - let offsetZ = 0.0 - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3") + } else if param.filter.width == 1 && param.filter.height == 5 { + super.init(device: device, inFunctionName: "conv_add_5x1") + } else if param.filter.width == 5 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_1x5") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_add_3x3") + } else { + fatalError(" unsupport yet ") + } + } else { + fatalError() } - func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.setBuffer(param.y.buffer, offset: 0, index: 2) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + + + let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1]) + + let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0]) + +// print(" function: \(functionName)") +// print("offset x: \(offsetX)") +// print("offset y: \(offsetY)") + + let offsetZ = 0.0 + let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) +// print("metal param: ") +// print(inMetalParam) + + metalParam = inMetalParam + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..35d49953c656364799e8ca7400ef4bac445200a0 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift @@ -0,0 +1,150 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ConvAddPreluKernel: Kernel, Computable { + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvAddPreluParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.y.initBuffer(device: device, precision: computePrecision) + param.alpha.initBuffer(device: device, precision: computePrecision) + + if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half") + } + + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half") + } + } else { + fatalError(" unsupport yet ") + } + } else if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float") + } + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float") + } + } else { + fatalError(" unsupport yet ") + } + } else { + fatalError() + } + + let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1]) + + let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0]) + + // print(" function: \(functionName)") + // print("offset x: \(offsetX)") + // print("offset y: \(offsetY)") + + let offsetZ = 0.0 + let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + // print("metal param: ") + // print(inMetalParam) + + metalParam = inMetalParam + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddPreluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..e79f8f9be37c2575b28aef2e9169ab814c9587fe --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift @@ -0,0 +1,180 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation +import MetalPerformanceShaders + +struct ConvBNReluTestParam: TestParam { + let inputTexture: MTLTexture + let outputTexture: MTLTexture + var metalParam: MetalConvParam + let filterBuffer: MTLBuffer + let biaseBuffer: MTLBuffer + let newScaleBuffer: MTLBuffer + let newBiaseBuffer: MTLBuffer + let filterSize: (width: Int, height: Int, channel: Int) + init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { + + inputTexture = inInputTexture + outputTexture = inOutputTexture + metalParam = inMetalParam + filterBuffer = inFilterBuffer + biaseBuffer = inBiaseBuffer + newScaleBuffer = inNewScaleBuffer + newBiaseBuffer = inNewBiaseBuffer + filterSize = inFilterSize + } +} + +class ConvBNReluKernel: Kernel, Computable, Testable { + required init(device: MTLDevice, testParam: ConvBNReluTestParam) { + if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") + } else if testParam.filterSize.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") + } else { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") + } + } + + var metalParam: MetalConvParam! + + required init(device: MTLDevice, param: ConvBNReluParam

) { + + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.variance.initBuffer(device: device, precision: .Float32) + param.mean.initBuffer(device: device, precision: .Float32) + param.scale.initBuffer(device: device, precision: .Float32) + param.bias.initBuffer(device: device, precision: .Float32) + + if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") + } else { + fatalError(" unsupport ") + } + } else if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half") + } else { + fatalError(" unsupport ") + } + } else { + fatalError() + } + + + + let offsetX = param.filter.width/2 - Int(param.paddings[0]) + let offsetY = param.filter.height/2 - Int(param.paddings[1]) + +// print(" param filter width: \(param.filter.width)") +// print(" param filter height: \(param.filter.height)") +// +// print(" param paddings: \(param.paddings)") +// +// print("ConvBNReluKernel offset x: \(offsetX)") +// print("ConvBNReluKernel offset y: \(offsetY)") + + let offsetZ = 0.0 + + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + + var invs: [P] = [] + let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) + + for i in 0...stride { + let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) + invs.append(P(inv)) + } + + let newScale: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.scale.buffer.length) + let newBiase: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.bias.buffer.length) + + let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self) + let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self) + let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self) + for i in 0...stride { + newScale[i] = invs[i] * scaleContents[i] + newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i] + } + + var newBiaseBuffer: MTLBuffer + var newScaleBuffer: MTLBuffer + + if computePrecision == .Float32 { + newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! + newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! + } else if computePrecision == .Float16 { + + newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! + newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! + + float32ToFloat16(input: newBiase as! UnsafeMutablePointer, output: newBiaseBuffer.contents(), count: param.bias.buffer.length / MemoryLayout

.size) + + float32ToFloat16(input: newScale as! UnsafeMutablePointer, output: newScaleBuffer.contents(), count: param.scale.buffer.length / MemoryLayout

.size) + } else { + fatalError(" unsupport ") + } + + param.newBiase = newBiaseBuffer + param.newScale = newScaleBuffer + + newScale.deinitialize(count: param.scale.buffer.length) + newScale.deallocate() + + newBiase.deinitialize(count: param.bias.buffer.length) + newBiase.deallocate() + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvBNReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.newScale!, offset: 0, index: 2) + encoder.setBuffer(param.newBiase!, offset: 0, index: 3) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + public func test(commandBuffer: MTLCommandBuffer, param: ConvBNReluTestParam) { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + + encoder.setTexture(param.inputTexture, index: 0) + encoder.setTexture(param.outputTexture, index: 1) + var inMetalParam = param.metalParam + encoder.setBytes(&inMetalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filterBuffer, offset: 0, index: 1) + encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 2) + encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 3) + encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal deleted file mode 100644 index 9d0c6de35ed23b14a05a9c3e6398931556d535a0..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal +++ /dev/null @@ -1,400 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include -using namespace metal; - -struct MetalConvParam { - short offsetX; - short offsetY; - short offsetZ; - ushort strideX; - ushort strideY; -}; - - -kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 1; - - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - half4 output = half4(0.0); - - half4 input; - for (uint i = 0; i < input_arr_size; ++i) { - input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; - output.x += dot(input, weight_x); - - half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; - output.y += dot(input, weight_y); - - half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; - output.z += dot(input, weight_z); - - half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; - output.w += dot(input, weight_w); - } - - output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0)); - outTexture.write(output, gid.xy, gid.z); -} - -kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 9; - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - half4 output = half4(0.0); - - half4 input[9]; - for (uint i = 0; i < input_arr_size; ++i) { - input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); - input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); - input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); - input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); - input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); - input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); - input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); - input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); - for (int j = 0; j < 9; ++j) { - half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.x += dot(input[j], weight_x); - - half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.y += dot(input[j], weight_y); - - half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.z += dot(input[j], weight_z); - - half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.w += dot(input[j], weight_w); - } - } - output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0)); - outTexture.write(output, gid.xy, gid.z); -} - -kernel void conv_add_1x1_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 1; - - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - half4 output = half4(0.0); - - half4 input; - for (uint i = 0; i < input_arr_size; ++i) { - input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; - output.x += dot(input, weight_x); - - half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; - output.y += dot(input, weight_y); - - half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; - output.z += dot(input, weight_z); - - half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; - output.w += dot(input, weight_w); - } - output = output + biase[gid.z]; - outTexture.write(output, gid.xy, gid.z); -} - -kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - uint output_slice = gid.z; - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 9; - uint weithTo = gid.z * kernelHXW * 4; - half4 output = half4(0.0); - half4 inputs[9]; - inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); - inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); - inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); - inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); - inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); - inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); - inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); - inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); - inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); - for (int j = 0; j < 9; ++j) { - half4 input = inputs[j]; - output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; - output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; - output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; - output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; - } - output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0)); - outTexture.write(output, gid.xy, gid.z); -} - - -/*---------------------------------------------*/ - - - -kernel void conv_add_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], - const device float4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 1; - - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - float4 output = float4(0.0); - - float4 input; - for (uint i = 0; i < input_arr_size; ++i) { - input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; - output.x += dot(input, weight_x); - - float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; - output.y += dot(input, weight_y); - - float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; - output.z += dot(input, weight_z); - - float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; - output.w += dot(input, weight_w); - } - output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); - outTexture.write(output, gid.xy, gid.z); -} - -kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], - const device float4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 9; - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - float4 output = float4(0.0); - - float4 input[9]; - for (uint i = 0; i < input_arr_size; ++i) { - input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); - input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); - input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); - input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); - input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); - input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); - input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); - input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); - for (int j = 0; j < 9; ++j) { - float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.x += dot(input[j], weight_x); - - float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.y += dot(input[j], weight_y); - - float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.z += dot(input[j], weight_z); - - float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; - output.w += dot(input[j], weight_w); - } - } - output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); - outTexture.write(output, gid.xy, gid.z); -} - -kernel void conv_add_1x1(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], - const device float4 *biase [[buffer(2)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 1; - - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - float4 output = float4(0.0); - - float4 input; - for (uint i = 0; i < input_arr_size; ++i) { - input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; - output.x += dot(input, weight_x); - - float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; - output.y += dot(input, weight_y); - - float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; - output.z += dot(input, weight_z); - - float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; - output.w += dot(input, weight_w); - } - output = output + biase[gid.z]; - outTexture.write(output, gid.xy, gid.z); -} - -kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device float *weights [[buffer(1)]], - const device float4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - uint output_slice = gid.z; - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 9; - uint weithTo = gid.z * kernelHXW * 4; - float4 output = float4(0.0); - float4 inputs[9]; - inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); - inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); - inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); - inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); - inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); - inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); - inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); - inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); - inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); - for (int j = 0; j < 9; ++j) { - float4 input = inputs[j]; - output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; - output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; - output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; - output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; - } - output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); - outTexture.write(output, gid.xy, gid.z); -} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift index 92c43fe3218aa0c3ecfabd9a8d85c8107ecad273..345136a503d8eda6ad23f85ef01eb53fa539d453 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift @@ -14,38 +14,49 @@ import Foundation - public struct MetalConvParam { - let offsetX: Int16 - let offsetY: Int16 - let offsetZ: Int16 - let strideX: UInt16 - let strideY: UInt16 - let paddedZ: UInt16 + let offsetX: Int16 + let offsetY: Int16 + let offsetZ: Int16 + let strideX: UInt16 + let strideY: UInt16 + let dilationX: UInt16 + let dilationY: UInt16 } class ConvKernel: Kernel, Computable { - var metalParam: MetalConvParam! - required init(device: MTLDevice, param: ConvParam

) { - super.init(device: device, inFunctionName: "conv_add_1x1") - let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) - let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) - let offsetZ = 0.0 - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvParam

) { + param.filter.initBuffer(device: device, precision: ComputePrecision.Float32) + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_3x3") + } else if param.filter.width == 3 && param.filter.height == 3 { + super.init(device: device, inFunctionName: "conv_3x3") + } else { + fatalError(" unsupport ") } + + let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) + let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) + let offsetZ = 0.0 - func compute(commandBuffer: MTLCommandBuffer, param: ConvParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..435776c850854f2fc4259e8a2089299da825f463 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct MetalConvTransposeParam { + let kernelW: UInt16; + let kernelH: UInt16; + + let strideX: UInt16; + let strideY: UInt16; + + let paddingX: UInt16; + let paddingY: UInt16; + + let dilationX: UInt16; + let dilationY: UInt16; +} + +class ConvTransposeKernel: Kernel, Computable{ + var metalParam: MetalConvTransposeParam! + required init(device: MTLDevice, param: ConvTransposeParam

) { + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision, convertToNHWC: false, withTranspose: true) + if computePrecision == .Float32 { + if param.stride == [2, 2] && param.stride == [2, 2] { + super.init(device: device, inFunctionName: "conv_transpose2x2_stride2") + } else { + fatalError(" -- conv transpose unsupported yet -- ") + } + } else if computePrecision == .Float16 { + if param.stride == [2, 2] && param.stride == [2, 2] { + super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half") + } else { + fatalError(" -- conv transpose unsupported yet -- ") + } + } else { + fatalError() + } + +// let filter: [Float32] = param.filter.buffer.array() +// print(" conv transpose filter") +// print(filter) + let kernelWidth = UInt16(param.filter.width) + let kernelHeight = UInt16(param.filter.height) + + let strideX = UInt16(param.stride[0]) + let strideY = UInt16(param.stride[1]) + let paddingX = UInt16(param.paddings[0]) + let paddingY = UInt16(param.paddings[1]) + let dilationX = UInt16(param.dilations[0]) + let dilationY = UInt16(param.dilations[1]) + + metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY) + + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift index 361e77950841f2fa2b54884a2fbf394714f10902..16774a85492d2e21ca5575ed661674824319db28 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift @@ -14,13 +14,60 @@ import Foundation +struct ElementwiseAddMetalParam { + var fast: Int32 = 0 + var axis: Int32 = 0 + var ylen: Int32 = 0 + var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) + var xtrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) + var ydim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) + var ytrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) +} class ElementwiseAddKernel: Kernel, Computable { - required init(device: MTLDevice, param: ElementwiseAddParam

) { - super.init(device: device, inFunctionName: "elementwise_add") - } + var metalParam: ElementwiseAddMetalParam + required init(device: MTLDevice, param: ElementwiseAddParam

) { + param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) + + metalParam = ElementwiseAddMetalParam.init() + + let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) } + let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) } + let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) } + let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) } - func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam

) throws { - + metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3]) + metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3]) + metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3]) + metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3]) + if param.axis == -1 { + metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout()) + } else { + metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) + } + metalParam.ylen = Int32(param.inputY.tensorDim.cout()) + if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { + // print("===> elementwise_add fast!!!") + metalParam.fast = 1 + } + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "elementwise_add") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "elementwise_add_half") + } else { + fatalError() + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + encoder.setTexture(param.inputX.metalTexture, index: 0) + encoder.setTexture(param.inputY.metalTexture, index: 1) + encoder.setTexture(param.output.metalTexture, index: 2) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..91589864b07f10754c860d038e754e09874db54e --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddPreluKernel.swift @@ -0,0 +1,79 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + + +class ElementwiseAddPreluKernel: Kernel, Computable { + var metalParam: ElementwiseAddMetalParam + required init(device: MTLDevice, param: ElementwiseAddPreluParam

) { + param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) + param.alpha.initBuffer(device: device, precision: computePrecision) + + metalParam = ElementwiseAddMetalParam.init() + + let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) } + let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) } + let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) } + let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) } + + metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3]) + metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3]) + metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3]) + metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3]) + if param.axis == -1 { + metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout()) + } else { + metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) + } + metalParam.ylen = Int32(param.inputY.tensorDim.cout()) + if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { + // print("===> elementwise_add fast!!!") + metalParam.fast = 1 + } + + if computePrecision == .Float32 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "elementwise_add_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "elementwise_add_element_float") + } else { + super.init(device: device, inFunctionName: "elementwise_add_prelu_float") + } + } else if computePrecision == .Float16 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "elementwise_add_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "elementwise_add_channel_half") + } else { + super.init(device: device, inFunctionName: "elementwise_add_channel_half") + } + } else { + fatalError() + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddPreluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + encoder.setTexture(param.inputX.metalTexture, index: 0) + encoder.setTexture(param.inputY.metalTexture, index: 1) + encoder.setTexture(param.output.metalTexture, index: 2) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.alpha.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..090c55b16160dca19bfcdc4f3467cacdbc9a20c2 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/FlattenKernel.swift @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct FlattenMetalParam { + var idim: (Int32, Int32, Int32, Int32) + var itrans: (Int32, Int32, Int32, Int32) + var odim: (Int32, Int32, Int32, Int32) + var otrans: (Int32, Int32, Int32, Int32) +} + + +class FlattenKernel: Kernel, Computable{ + + var metalParam: FlattenMetalParam + + required init(device: MTLDevice, param: FlattenParam

) { + param.output.initTexture(device: device, computePrecision: computePrecision) + var id: [Int32] = [1, 1, 1, 1] + for i in 0..) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift deleted file mode 100644 index 8f97d61e83fc71efca8a4d41705b3eb56d7dbdb3..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -import Metal -import Foundation - -public protocol TestParam { -} - -public protocol Testable { - associatedtype TestParamType: TestParam - func test(commandBuffer: MTLCommandBuffer, param: TestParamType) - init(device: MTLDevice, testParam: TestParamType) -} - - -protocol Computable { - associatedtype ParamType: OpParam - func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws - init(device: MTLDevice, param: ParamType) -} - -protocol KernelProtocol { - var pipline: MTLComputePipelineState { get set } - var functionName: String { get set } - -} - -open class Kernel { - let pipline: MTLComputePipelineState - let functionName: String - public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) { - pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib) - functionName = inFunctionName - } -} - -open class CusomKernel: Kernel { - public struct Shape { - public let width: Int - public let height: Int - public let channel: Int - public init(inWidth: Int, inHeight: Int, inChannel: Int){ - width = inWidth - height = inHeight - channel = inChannel - } - } - let outputTexture: MTLTexture - public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) { - let textureDesc = MTLTextureDescriptor.init() - textureDesc.textureType = .type2D - textureDesc.width = outputDim.width - textureDesc.height = outputDim.height - textureDesc.depth = (outputDim.channel + 3) / 4 - textureDesc.pixelFormat = .rgba32Float - textureDesc.usage = [.shaderRead, .shaderWrite] - textureDesc.storageMode = .shared - outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error " - - super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib) - } - - func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - encoder.setTexture(inputTexuture, index: 0) - encoder.setTexture(outputTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: outputTexture) - encoder.endEncoding() - } - -} - diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal deleted file mode 100644 index 92ee1184520d7b1df2577c1fc52cc3257de7be79..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include -using namespace metal; - -struct OutputDim { - ushort width; - ushort height; - ushort strideX; - ushort strideY; -}; - -kernel void resize(texture2d inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant OutputDim ¶ms [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - - constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint2 pos = gid.xy * uint2(params.strideX, params.strideY); - const half4 input = inTexture.read(pos); - outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z); -} - -kernel void relu(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); - const half4 input = inTexture.read(gid.xy, gid.z); - const float4 relu = fmax((float4)input, 0.0); - outTexture.write(half4(relu), gid.xy, gid.z); -} - -kernel void elementwise_add(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - const device half4 *biasTerms [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); - const half4 input = inTexture.read(gid.xy, gid.z); - outTexture.write(input, gid.xy, gid.z); -} - -kernel void batchnorm(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - const device half4 * newScale [[buffer(0)]], - const device half4 * newBias [[buffer(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - const half4 input = inTexture.read(gid.xy, gid.z); - half4 output = input * newScale[gid.z] + newBias[gid.z]; - outTexture.write(output, gid.xy, gid.z); -} - -//kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], -// texture2d_array outTexture [[texture(1)]], -// uint3 gid [[thread_position_in_grid]]) { -// if (gid.x >= inTexture.get_width() || -// gid.y >= inTexture.get_height()){ -// return; -// } -// const half4 input = inTexture.read(gid.xy); -// outTexture.write(input, gid.xy, 0); -//} - -kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= inTexture.get_width() || - gid.y >= inTexture.get_height()){ - return; - } - const float4 input = inTexture.read(gid.xy); - outTexture.write(input, gid.xy, 0); -} - - -kernel void texture2d_to_2d_array_half(texture2d inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= inTexture.get_width() || - gid.y >= inTexture.get_height()){ - return; - } - const half4 input = inTexture.read(gid.xy); - outTexture.write(input, gid.xy, 0); -} - -struct PoolParam { - int ksizeX; - int ksizeY; - int strideX; - int strideY; - int paddingX; - int paddingY; - int poolType; -}; - -kernel void pool(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant PoolParam &pm [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int xmin = gid.x * pm.strideX - pm.paddingX; - int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width())); - xmin = max(xmin, 0); - int ymin = gid.y * pm.strideX - pm.paddingX; - int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height())); - ymin = max(ymin, 0); - - float4 r = 0; - if (pm.poolType == 0) { - r = inTexture.read(uint2(xmin, ymin), gid.z); - for (int x = xmin; x < xmax; x++) { - for (int y = ymin; y < ymax; y++) { - r = fmax(r, inTexture.read(uint2(x, y), gid.z)); - } - } - } else if (pm.poolType == 1) { - for (int x = xmin; x < xmax; x++) { - for (int y = ymin; y < ymax; y++) { - r += inTexture.read(uint2(x, y), gid.z); - } - } - r /= pm.ksizeX * pm.ksizeY; - } - outTexture.write(r, gid.xy, gid.z); -} - - -kernel void pool_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant PoolParam &pm [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int xmin = gid.x * pm.strideX - pm.paddingX; - int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width())); - xmin = max(xmin, 0); - int ymin = gid.y * pm.strideX - pm.paddingX; - int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height())); - ymin = max(ymin, 0); - - half4 r = 0; - if (pm.poolType == 0) { - r = inTexture.read(uint2(xmin, ymin), gid.z); - for (int x = xmin; x < xmax; x++) { - for (int y = ymin; y < ymax; y++) { - r = fmax(r, inTexture.read(uint2(x, y), gid.z)); - } - } - } else if (pm.poolType == 1) { - for (int x = xmin; x < xmax; x++) { - for (int y = ymin; y < ymax; y++) { - r += inTexture.read(uint2(x, y), gid.z); - } - } - r /= pm.ksizeX * pm.ksizeY; - } - outTexture.write(r, gid.xy, gid.z); -} - -kernel void reshape(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - - float4 r = inTexture.read(uint2(0, 0), gid.z); - outTexture.write(r, gid.xy, gid.z); -} - -kernel void reshape_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - - half4 r = inTexture.read(uint2(0, 0), gid.z); - outTexture.write(r, gid.xy, gid.z); -} - -kernel void softmax(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int zsize = inTexture.get_array_size(); - float maxv = inTexture.read(uint2(0, 0), 0)[0]; - for (int z = 0; z < zsize; z++) { - float4 r = inTexture.read(uint2(0, 0), z); - maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3]))); - } - float sum = 0; - for (int z = 0; z < zsize; z++) { - float4 r = inTexture.read(uint2(0, 0), z); - sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv); - } - float4 rr = inTexture.read(gid.xy, gid.z); - rr = exp(rr - maxv) / sum; - outTexture.write(rr, gid.xy, gid.z); -} - - -kernel void softmax_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int zsize = inTexture.get_array_size(); - half maxv = inTexture.read(uint2(0, 0), 0)[0]; - for (int z = 0; z < zsize; z++) { - half4 r = inTexture.read(uint2(0, 0), z); - maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3]))); - } - float sum = 0; - for (int z = 0; z < zsize; z++) { - half4 r = inTexture.read(uint2(0, 0), z); - sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv); - } - half4 rr = inTexture.read(gid.xy, gid.z); - rr = exp(rr - maxv) / sum; - outTexture.write(rr, gid.xy, gid.z); -} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..3f78efb89e47197ae0af6a1bb53955bc4a937eda --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift @@ -0,0 +1,55 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class MulticlassNMSKernel: Kernel, Computable{ + let pipline1: MTLComputePipelineState + + required init(device: MTLDevice, param: MulticlassNMSParam

) { + + param.middleOutput.initBuffer(device: device) + param.bboxOutput.initBuffer(device: device) + if computePrecision == .Float32 { + pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true) + super.init(device: device, inFunctionName: "nms_fetch_result") + } else if computePrecision == .Float16 { + pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true) + super.init(device: device, inFunctionName: "nms_fetch_result_half") + } else { + fatalError( " unsupport precision " ) + } + + } + + func compute(commandBuffer: MTLCommandBuffer, param: MulticlassNMSParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.scores.metalTexture, index: 0) + encoder.setBuffer(param.middleOutput.resultBuffer!, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.scores.metalTexture) + encoder.endEncoding() + + guard let encoderBox = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoderBox.setTexture(param.bboxes.metalTexture, index: 0) + encoderBox.setBuffer(param.bboxOutput.resultBuffer!, offset: 0, index: 0) + encoderBox.dispatch(computePipline: pipline1, outTexture: param.bboxes.metalTexture) + encoderBox.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift index 983a3acb9943f2e549b07d095c7dd4a23c1e96d9..1d66e420e236f2e0a7734838a293215807caa968 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift @@ -15,46 +15,57 @@ import Foundation struct PoolMetalParam { - let ksizeX: Int32 - let ksizeY: Int32 - let strideX: Int32 - let strideY: Int32 - let paddingX: Int32 - let paddingY: Int32 - let poolType: Int32 + let ksizeX: Int32 + let ksizeY: Int32 + let strideX: Int32 + let strideY: Int32 + let paddingX: Int32 + let paddingY: Int32 + let poolType: Int32 } class PoolKernel: Kernel, Computable{ - func compute(commandBuffer: MTLCommandBuffer, param: PoolParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encoder is nil") - } - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - var poolType: Int32 - switch param.poolType { - case "max": - poolType = 0 - case "avg": - poolType = 1 - default: - throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType) - } - var pmp = PoolMetalParam.init( - ksizeX: param.ksize[0], - ksizeY: param.ksize[1], - strideX: param.stride[0], - strideY: param.stride[1], - paddingX: param.padding[0], - paddingY: param.padding[1], - poolType: poolType - ) - encoder.setBytes(&pmp, length: MemoryLayout.size, index: 0) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + var metalParam: PoolMetalParam + required init(device: MTLDevice, param: PoolParam

) { + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + + var poolType: Int32 + switch param.poolType { + case "max": + poolType = 0 + case "avg": + poolType = 1 + default: + fatalError() } + metalParam = PoolMetalParam.init( + ksizeX: param.ksize[0], + ksizeY: param.ksize[1], + strideX: param.stride[0], + strideY: param.stride[1], + paddingX: param.padding[0], + paddingY: param.padding[1], + poolType: poolType + ) - required init(device: MTLDevice, param: PoolParam

) { - super.init(device: device, inFunctionName: "pool") + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "pool") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "pool_half") + } else { + fatalError() + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: PoolParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..4ee25888f06048bfe696028ea2338a56fd06053e --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class PreluKernel: Kernel, Computable{ + required init(device: MTLDevice, param: PreluParam

) { + param.alpha.initBuffer(device: device, precision: computePrecision) + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + if computePrecision == .Float32 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "prelu_channel") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "prelu_element") + } else { + super.init(device: device, inFunctionName: "prelu_other") + } + } else if computePrecision == .Float16 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "prelu_element_half") + } else { + super.init(device: device, inFunctionName: "prelu_other_half") + } + } else { + fatalError() + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: PreluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..be18c4411ffbef704dff61bb2aa82bc338daf163 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift @@ -0,0 +1,151 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct PriorBoxMetalParam { + let offset: Float32 + let stepWidth: Float32 + let stepHeight: Float32 + let minSize: Float32 + let maxSize: Float32 + let imageWidth: Float32 + let imageHeight: Float32 + let clip: Bool + let numPriors: uint + let aspecRatiosSize: uint + let minSizeSize: uint + let maxSizeSize: uint +} + +class PriorBoxKernel: Kernel, Computable{ + var metalParam: PriorBoxMetalParam! + + required init(device: MTLDevice, param: PriorBoxParam

) { + + let originDim = param.output.tensorDim; + + param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]]) + param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]]) + + param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision) + param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision) + + + if computePrecision == .Float32 { + if param.min_max_aspect_ratios_order { + super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder") + } else { + super.init(device: device, inFunctionName: "prior_box") + } + + } else if computePrecision == .Float16 { + if param.min_max_aspect_ratios_order { + super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half") + } else { + super.init(device: device, inFunctionName: "prior_box_half") + } + } else { + fatalError() + } + + + guard param.minSizes.count == 1 else { + fatalError(" need implement ") + } + +// let n = 1 +// let h = param.output.dim[1] +// let w = param.output.dim[2] +// let c = param.output.dim[3] * param.output.dim[0] +// +// param.output.dim = Dim.init(inDim: [n, h, w, c]) +// param.output.transpose = [0, 1, 2, 3] + + let imageWidth = Float32(param.inputImage.padToFourDim[3]) + let imageHeight = Float32(param.inputImage.padToFourDim[2]) + + let featureWidth = param.input.padToFourDim[3] + let featureHeight = param.input.padToFourDim[2] + + if param.stepW == 0 || param.stepH == 0 { + param.stepW = Float32(imageWidth) / Float32(featureWidth) + param.stepH = Float32(imageHeight) / Float32(featureHeight) + } + + var outputAspectRatior: [Float32] = [] + outputAspectRatior.append(1.0) + + let epsilon = 1e-6 + for ar in param.aspectRatios { + var alreadyExist = false + for outputAr in outputAspectRatior { + if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) { + alreadyExist = true + break + } + } + + if !alreadyExist { + outputAspectRatior.append(ar) + } + if param.flip { + outputAspectRatior.append(1.0 / ar) + } + } + + if computePrecision == .Float16 { + let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout.size) + float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count) + param.newAspectRatios = buffer + + } else if computePrecision == .Float32 { + let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout.size, options: []) + param.newAspectRatios = buffer + } else { + fatalError() + } + + let aspectRatiosSize = uint(outputAspectRatior.count) + + let maxSizeSize: uint = uint(param.maxSizes.count) + let minSizeSize: uint = uint(param.minSizes.count) + + let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize + + let minSize = param.minSizes.last ?? 0.0 + let maxSize = param.maxSizes.last ?? 0.0 + + metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize) + + } + + func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setTexture(param.outputVariances.metalTexture, index: 2) + + encoder.setBuffer(param.newAspectRatios!, offset: 0, index: 0) + + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 1) + + encoder.setBytes(param.variances, length: MemoryLayout.size * param.variances.count, index: 2) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift index 3c669cf4d965f7842070c4d38427f6d1d7440db5..18f279e9f3c5226d6eea5b5e6f0a42502173071e 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift @@ -15,17 +15,23 @@ import Foundation class ReluKernel: Kernel, Computable{ - func compute(commandBuffer: MTLCommandBuffer, param: ReluParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + func compute(commandBuffer: MTLCommandBuffer, param: ReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } - - required init(device: MTLDevice, param: ReluParam

) { - super.init(device: device, inFunctionName: "relu") + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: ReluParam

) { + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "relu") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "relu_half") + } else { + fatalError() } + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift index 438c89e59eb7e9a2ef315997b9d8d1f3a44a5462..4114d3c3c62054235cd57fe37fe9cd83c5bb58cb 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift @@ -14,18 +14,84 @@ import Foundation +struct ReshapeMetalParam { + var idim: (Int32, Int32, Int32, Int32) + var itrans: (Int32, Int32, Int32, Int32) + var odim: (Int32, Int32, Int32, Int32) + var otrans: (Int32, Int32, Int32, Int32) +} + +struct ReshapeTestParam: TestParam { + let inputTexture: MTLTexture + let outputTexture: MTLTexture + let param: ReshapeMetalParam +} + class ReshapeKernel: Kernel, Computable{ - required init(device: MTLDevice, param: ReshapeParam

) { - super.init(device: device, inFunctionName: "reshape") + + var metalParam: ReshapeMetalParam + + required init(device: MTLDevice, param: ReshapeParam

) { + param.output.initTexture(device: device, computePrecision: computePrecision) + var id: [Int32] = [1, 1, 1, 1] + for i in 0..) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encoder is nil") - } - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + let it: [Int32] = param.input.transpose.map { Int32($0) } + var od: [Int32] = [1, 1, 1, 1] + for i in 0..) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + +// func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) { +// guard let encoder = commandBuffer.makeComputeCommandEncoder() else { +// fatalError() +// } +// encoder.setTexture(testParam.inputTexture, index: 0) +// encoder.setTexture(testParam.outputTexture, index: 1) +// var pm: ReshapeMetalParam = testParam.param +// encoder.setBytes(&pm, length: MemoryLayout.size, index: 0) +// encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture) +// encoder.endEncoding() +// } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..e5cbce1d1e196f88bb7a3b38d3e92c330774f3ba --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeBilinearKernel.swift @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct ResizeBilinearMetalParam { + var ratio_h: Float32 + var ratio_w: Float32 +} + +class ResizeBilinearKernel: Kernel, Computable{ + func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + let ratio_h: Float32 = Float32(param.input.tensorDim.dims[2]) / Float32(param.output.tensorDim.dims[2]) + let ratio_w: Float32 = Float32(param.input.tensorDim.dims[3]) / Float32(param.output.tensorDim.dims[3]) + var p = ResizeBilinearMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w) + encoder.setBytes(&p, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: ResizeBilinearParam

) { + param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "resize_bilinear") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "resize_bilinear_half") + } else { + fatalError() + } + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift deleted file mode 100644 index d2795111ad1f43c759b95aa52ed34085a4ac147a..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ -// -//import Foundation -//import MetalPerformanceShaders -// -// -//struct ResizeParam: OpParam{ -// typealias OutputType = <#type#> -// -// typealias ParamPrecisionType = <#type#> -// -// let input: MTLTexture -// let output: MTLTexture -// let expectDim: Dim -//} -// -//struct OutputDim { -// let width: UInt16 -// let height: UInt16 -// let strideX: UInt16 -// let strideY: UInt16 -//} -// -//class ResizeKernel: Kernel, Computable{ -// var lanczos: MPSImageLanczosScale -// required init(device: MTLDevice, param: ResizeParam) { -// lanczos = MPSImageLanczosScale.init(device: device) -// super.init(device: device, inFunctionName: "resize") -// } -// func compute(commandBuffer: MTLCommandBuffer, param: ResizeParam) throws { -//// guard let encoder = commandBuffer.makeComputeCommandEncoder() else { -//// throw PaddleMobileError.predictError(message: " encode is nil") -//// } -// lanczos.encode(commandBuffer: commandBuffer, sourceTexture: param.input, destinationTexture: param.output) -// -//// encoder.setTexture(param.input, index: 0) -//// encoder.setTexture(param.output, index: 1) -//// let strideX = param.input.width/param.expectDim[2] -//// let strideY = param.input.height/param.expectDim[1] -//// var outputDim = OutputDim.init(width: UInt16(param.expectDim[1]), height: UInt16(param.expectDim[2]), strideX: UInt16(strideX), strideY: UInt16(strideY)) -//// encoder.setBytes(&outputDim, length: MemoryLayout.size, index: 0) -//// encoder.dispatch(computePipline: pipline, outTexture: param.output) -//// encoder.endEncoding() -// } -// -// -// -// -//} - diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..feb052a44fdc7c6134cc90f07f3fc94ad0a497df --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct ShapeMetalParam { +} + +class ShapeKernel: Kernel, Computable{ + func compute(commandBuffer: MTLCommandBuffer, param: ShapeParam

) throws { +// print("shape compute") +// guard let encoder = commandBuffer.makeComputeCommandEncoder() else { +// throw PaddleMobileError.predictError(message: " encode is nil") +// } +// encoder.setTexture(param.output.metalTexture, index: 0) +// encoder.endEncoding() + } + + required init(device: MTLDevice, param: ShapeParam

) { + param.output.initTexture(device: device, computePrecision: computePrecision) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "shape") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "shape_half") + } else { + fatalError() + } + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift index b94f0286f43ec482353ff278c6c104da77f47315..5d6874da151b64fd58c2016865515778d6267551 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift @@ -14,19 +14,38 @@ import Foundation +struct SoftmaxMetalParam { + let N: Int32 + let K: Int32 +} + class SoftmaxKernel: Kernel, Computable{ - - func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encoder is nil") - } - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + + var metalParam: SoftmaxMetalParam + required init(device: MTLDevice, param: SoftmaxParam

) { + param.output.initTexture(device: device, computePrecision: computePrecision) + metalParam = SoftmaxMetalParam.init( + N: Int32(param.input.tensorDim[0]), + K: Int32(param.input.tensorDim[1]) + ) + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "softmax_float") + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "softmax_half") + } else { + fatalError() } - - required init(device: MTLDevice, param: SoftmaxParam

) { - super.init(device: device, inFunctionName: "softmax") + } + + func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..67e1cd9ab85c3c60d89846bab89ef10bbe513305 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SplitKernel.swift @@ -0,0 +1,93 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct SplitMetalParam { + var idim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1) + var axis: Int32 = 0 + var offset: Int32 = 0 + var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) + var vdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) +} + +class SplitKernel: Kernel, Computable{ + var smp: SplitMetalParam + func compute(commandBuffer: MTLCommandBuffer, param: SplitParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + encoder.setTexture(param.input.metalTexture, index: 0) + for i in 0...size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: SplitParam

) { + // param.output.initTexture(device: device, computePrecision: computePrecision) + let num = param.outputList.count + let rank = param.input.tensorDim.cout() + assert(num >= 2 && num <= 4) + for output in param.outputList { + output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) + } + smp = SplitMetalParam.init() + smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3])) + smp.axis = Int32(param.axis + param.input.dim.cout() - param.input.tensorDim.cout()) + for i in 0..<4 { + if param.input.transpose[i] == smp.axis { + smp.axis = Int32(i) + break + } + } + smp.trans = (Int32(param.input.transpose[0]), Int32(param.input.transpose[1]), Int32(param.input.transpose[2]), Int32(param.input.transpose[3])) + var vdim: [Int32] = [0, 0, 0, 0] + for i in 0..: Kernel, Computable{ - func compute(commandBuffer: MTLCommandBuffer, param: FeedParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - encoder.setTexture(param.input.mtlTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture) - encoder.endEncoding() + func compute(commandBuffer: MTLCommandBuffer, param: FeedParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } - - required init(device: MTLDevice, param: FeedParam

) { - super.init(device: device, inFunctionName: "texture2d_to_2d_array") + encoder.setTexture(param.input.mtlTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: FeedParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "texture2d_to_2d_array_half") + } else if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "texture2d_to_2d_array") + } else { + fatalError() } + + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..7b872283d45bca4adb5e90a531c936f2ad5534f8 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift @@ -0,0 +1,79 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct TransposeMetalParam { + var iC: Int32 = 0 + var oC: Int32 = 0 + var axis: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) +} + +class TransposeKernel: Kernel, Computable { + var metalParam: TransposeMetalParam = TransposeMetalParam.init() + required init(device: MTLDevice, param: TransposeParam

) { + param.output.initTexture(device: device, computePrecision: computePrecision) + let rank = param.input.tensorDim.cout() + var axis: [Int] = [0, 1, 2, 3] + for i in 0..", kernelFunc) + print(metalParam) + super.init(device: device, inFunctionName: kernelFunc) + } + + func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..96333a07a9669ecb2b5bfe901d71be729e37b533 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void batchnorm(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 * nscale [[buffer(0)]], + const device float4 * nbias [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + const float4 input = inTexture.read(gid.xy, gid.z); + float4 output = input * nscale[gid.z] + nbias[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void batchnorm_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device half4 * newScale [[buffer(0)]], + const device half4 * newBias [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + const half4 input = inTexture.read(gid.xy, gid.z); + half4 output = input * newScale[gid.z] + newBias[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal new file mode 100644 index 0000000000000000000000000000000000000000..eb94408c8ac664be5cf62bc28bfb02825856ebd4 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal @@ -0,0 +1,36 @@ +// +// BatchNormRelu.metal +// paddle-mobile +// + +#include +using namespace metal; + +struct MetalConvParam { + short offsetX; + short offsetY; + short offsetZ; + ushort strideX; + ushort strideY; +}; + +kernel void batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *new_scale [[buffer(0)]], + const device float4 *new_biase [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + float4 input; + float4 output; + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + input = inTexture.sample(sample, gid.x, gid.y, gid.z); + output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..a590f8089890f2fab1af4c1f736f3bfc5708aecf --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b + +#define FUNC(f, p) CONCAT2_(f, p) +#define VECTOR(p, n) CONCAT2(p, n) + +kernel void FUNC(bilinear_interp, P)(texture2d_array input [[texture(0)]], + texture2d_array output [[texture(1)]], + constant bilinear_interp_param & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + VECTOR(P, 4) r; + if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) { + r = input.read(gid.xy, gid.z); + } else { + P w = gid.x * pm.ratio_w; + P h = gid.y * pm.ratio_h; + uint w0 = w, h0 = h; + uint w1 = w0 + 1, h1 = h0 + 1; + P w1lambda = w - w0, h1lambda = h - h0; + P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda; + if (w1 >= input.get_width()) w1 = w0; + if (h1 >= input.get_height()) h1 = h0; + VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z); + VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z); + VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z); + VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z); + r = h2lambda * (w2lambda * r0 + w1lambda * r1) + + h1lambda * (w2lambda * r2 + w1lambda * r3); + } + output.write(r, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal new file mode 100644 index 0000000000000000000000000000000000000000..394cf89db09d47b0d3c87ff124c21a93962c0972 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal @@ -0,0 +1,29 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct bilinear_interp_param { + float ratio_h; + float ratio_w; +}; + +#define P float +#include "BilinearInterp.inc.metal" +#undef P + +#define P half +#include "BilinearInterp.inc.metal" +#undef P diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..918fbac1a713d7b0442a1eb1f07abea3616bec96 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.inc.metal @@ -0,0 +1,54 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b + +#define FUNC(f, p) CONCAT2_(f, p) +#define VECTOR(p, n) CONCAT2(p, n) +kernel void FUNC(boxcoder, P)(texture2d_array priorBox [[texture(0)]], + texture2d_array priorBoxVar [[texture(1)]], + texture2d_array targetBox [[texture(2)]], + texture2d_array output[[texture(3)]], + uint3 gid [[thread_position_in_grid]]) { + VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z); + VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z); + VECTOR(P, 4) t; + t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0]; + t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0]; + t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0]; + t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0]; + + P px = (p.x + p.z) / 2; + P py = (p.y + p.w) / 2; + P pw = p.z - p.x; + P ph = p.w - p.y; + + P tx = pv.x * t.x * pw + px; + P ty = pv.y * t.y * ph + py; + P tw = exp(pv.z * t.z) * pw; + P th = exp(pv.w * t.w) * ph; + + VECTOR(P, 4) r; + r.x = tx - tw / 2; + r.y = ty - th / 2; + r.z = tx + tw / 2; + r.w = ty + th / 2; + + output.write(r, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal new file mode 100644 index 0000000000000000000000000000000000000000..4009e213d51d0a9c33c70aea22b015df49e347dc --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BoxCoder.metal @@ -0,0 +1,23 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +#define P float +#include "BoxCoder.inc.metal" +#undef P +#define P half +#include "BoxCoder.inc.metal" +#undef P diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal new file mode 100644 index 0000000000000000000000000000000000000000..40bae035c097b5ab386d78520b6b04f074eb2fee --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal @@ -0,0 +1,120 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) { + abcd[0] = abcd[1] = abcd[2] = 0; + abcd[3] = xyzn[0] * 4 + xyzn[3]; +} +inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) { + abcd[0] = abcd[1] = 0; + abcd[2] = xyzn[1]; + abcd[3] = xyzn[0] * 4 + xyzn[3]; +} +inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) { + abcd[0] = 0; + abcd[3] = xyzn[0]; + abcd[2] = xyzn[1]; + abcd[1] = xyzn[2] * 4 + xyzn[3]; +} +inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) { + abcd[2] = xyzn[0]; + abcd[1] = xyzn[1]; + uint t = xyzn[2] * 4 + xyzn[3]; + abcd[0] = t / C; + abcd[3] = t % C; +} + +inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) { + xyzn[1] = xyzn[2] = 0; + xyzn[0] = abcd[3] / 4; + xyzn[1] = abcd[3] % 4; +} +inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) { + xyzn[2] = 0; + xyzn[1] = abcd[2]; + xyzn[0] = abcd[3] / 4; + xyzn[3] = abcd[3] % 4; +} +inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) { + xyzn[0] = abcd[3]; + xyzn[1] = abcd[2]; + xyzn[2] = abcd[1] / 4; + xyzn[3] = abcd[1] % 4; +} +inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) { + xyzn[0] = abcd[2]; + xyzn[1] = abcd[1]; + uint t = abcd[0] * C + abcd[3]; + xyzn[2] = t / 4; + xyzn[3] = t % 4; +} + +inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { + abcd[2] = xyzn[0]; + abcd[1] = xyzn[1]; + uint t = xyzn[2] * 4 + xyzn[3]; + abcd[0] = t / C; + abcd[3] = t % C; +} + +inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) { + xyzn[0] = abcd[2]; + xyzn[1] = abcd[1]; + uint t = abcd[0] * C + abcd[3]; + xyzn[2] = t / 4; + xyzn[3] = t % 4; +} + +inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) { + int32_t r = abcd[0]; + r = r * dim[1] + abcd[1]; + r = r * dim[2] + abcd[2]; + r = r * dim[3] + abcd[3]; + return r; +} + +inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) { + abcd[3] = ind % dim[3]; ind /= dim[3]; + abcd[2] = ind % dim[2]; ind /= dim[2]; + abcd[1] = ind % dim[1]; ind /= dim[1]; + abcd[0] = ind; +} + +inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { + for (int i = 0; i < 4; i++) { + opos[i] = ipos[trans[i]]; + } +} + +inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { + for (int i = 0; i < 4; i++) { + opos[trans[i]] = ipos[i]; + } +} + + +struct MetalConvParam { + short offsetX; + short offsetY; + short offsetZ; + ushort strideX; + ushort strideY; + ushort dilationX; + ushort dilationY; +}; + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..2b070fc48b78391e96b93823eeff7f936de2ff7d --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal @@ -0,0 +1,318 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c +#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d +#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e + +#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) +#define VECTOR(p, n) CONCAT2(p, n) +#define FUNC_R(f, r) CONCAT2_(f, r) + +#if V == VX +#define VV x +#elif V == VY +#define VV y +#elif V == VZ +#define VV z +#else +#define VV normal +#endif + +#if V == VNORMAL +//kernel void FUNC(concat, R, N, normal, P)(array, N> in [[texture(0)]], +// texture2d_array out_x [[texture(N)]], +// texture2d_array out [[texture(N+1)]], +// constant ConcatParam & pm [[buffer(0)]], +// uint3 gid [[thread_position_in_grid]]) { +//} +kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[texture(0)]], + texture2d_array in1 [[texture(1)]], +#if N >= 3 + texture2d_array in2 [[texture(2)]], +#endif +#if N >= 4 + texture2d_array in3 [[texture(3)]], +#endif +#if N >= 5 + texture2d_array in4 [[texture(4)]], +#endif +#if N >= 6 + texture2d_array in5 [[texture(5)]], +#endif + texture2d_array inx [[texture(N)]], + texture2d_array out [[texture(N+1)]], + constant ConcatParam & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + ConcatParam cp = pm; + int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4]; + VECTOR(P, 4) r = inx.read(gid.xy, gid.z); + for (int i = 0; i < 4; i++) { + xyzn[3] = i; +#if R == 4 + xyzn2abcd_4(cp.odim[3], xyzn, abcd); +#else + FUNC_R(xyzn2abcd, R)(xyzn, abcd); +#endif + int k = abcd[cp.axis] - cp.offset; + if (k < 0) continue; + int j = 0; + for (; j < N; j++) { + if (k < cp.vdim[j]) { + break; + } + k -= cp.vdim[j]; + } + if (j == N) { + continue; + } + int ta = cp.odim[cp.axis]; + abcd[cp.axis] = k; + cp.odim[cp.axis] = cp.vdim[j]; +#if R == 4 + abcd2xyzn_4(cp.odim[3], abcd, oxyzn); +#else + FUNC_R(abcd2xyzn, R)(abcd, oxyzn); +#endif + cp.odim[cp.axis] = ta; + switch (j) { + case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; +#if N >= 3 + case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; +#endif +#if N >= 4 + case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; +#endif +#if N >= 5 + case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; +#endif +#if N >= 6 + case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; +#endif + } + } + out.write(r, gid.xy, gid.z); +} + +#endif // V == NORMAL + + + +#if V == VX +kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[texture(0)]], + texture2d_array in1 [[texture(1)]], +#if N >= 3 + texture2d_array in2 [[texture(2)]], +#endif // N >= 3 +#if N >= 4 + texture2d_array in3 [[texture(3)]], +#endif // N >= 4 +#if N >= 5 + texture2d_array in4 [[texture(4)]], +#endif // N >= 5 +#if N >= 6 + texture2d_array in5 [[texture(5)]], +#endif // N >= 6 + texture2d_array out [[texture(N)]], + constant ConcatParam & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + int x = gid.x - pm.offset; + if (x < 0) return; + if (x < pm.vdim[0]) { + VECTOR(P, 4) r = in0.read(gid.xy, gid.z); + out.write(r, gid.xy, gid.z); + return; + } + x -= pm.vdim[0]; + if (x < pm.vdim[1]) { + VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#if N >= 3 + x -= pm.vdim[1]; + if (x < pm.vdim[2]) { + VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 3 +#if N >= 4 + x -= pm.vdim[2]; + if (x < pm.vdim[3]) { + VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 4 +#if N >= 5 + x -= pm.vdim[3]; + if (x < pm.vdim[4]) { + VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 5 +#if N >= 6 + x -= pm.vdim[4]; + if (x < pm.vdim[5]) { + VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 6 +} +#endif // V == VX + +#if V == VY +kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[texture(0)]], + texture2d_array in1 [[texture(1)]], +#if N >= 3 + texture2d_array in2 [[texture(2)]], +#endif // N >= 3 +#if N >= 4 + texture2d_array in3 [[texture(3)]], +#endif // N >= 4 +#if N >= 5 + texture2d_array in4 [[texture(4)]], +#endif // N >= 5 +#if N >= 6 + texture2d_array in5 [[texture(5)]], +#endif // N >= 6 + texture2d_array out [[texture(N)]], + constant ConcatParam & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + int y = gid.y - pm.offset; + if (y < 0) return; + if (y < pm.vdim[0]) { + VECTOR(P, 4) r = in0.read(gid.xy, gid.z); + out.write(r, gid.xy, gid.z); + return; + } + y -= pm.vdim[0]; + if (y < pm.vdim[1]) { + VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#if N >= 3 + y -= pm.vdim[1]; + if (y < pm.vdim[2]) { + VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 3 +#if N >= 4 + y -= pm.vdim[2]; + if (y < pm.vdim[3]) { + VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 4 +#if N >= 5 + y -= pm.vdim[3]; + if (y < pm.vdim[4]) { + VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 5 +#if N >= 6 + y -= pm.vdim[4]; + if (y < pm.vdim[5]) { + VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 6 +} +#endif // V == VY + +#if V == VZ +kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[texture(0)]], + texture2d_array in1 [[texture(1)]], +#if N >= 3 + texture2d_array in2 [[texture(2)]], +#endif // N >= 3 +#if N >= 4 + texture2d_array in3 [[texture(3)]], +#endif // N >= 4 +#if N >= 5 + texture2d_array in4 [[texture(4)]], +#endif // N >= 5 +#if N >= 6 + texture2d_array in5 [[texture(5)]], +#endif // N >= 6 + texture2d_array out [[texture(N)]], + constant ConcatParam & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + int z = gid.z - pm.offset; + if (z < 0) return; + if (z < pm.vdim[0]) { + VECTOR(P, 4) r = in0.read(gid.xy, gid.z); + out.write(r, gid.xy, gid.z); + return; + } + z -= pm.vdim[0]; + if (z < pm.vdim[1]) { + VECTOR(P, 4) r = in1.read(gid.xy, z); + out.write(r, gid.xy, gid.z); + return; + } +#if N >= 3 + z -= pm.vdim[1]; + if (z < pm.vdim[2]) { + VECTOR(P, 4) r = in2.read(gid.xy, z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 3 +#if N >= 4 + z -= pm.vdim[2]; + if (z < pm.vdim[3]) { + VECTOR(P, 4) r = in3.read(gid.xy, z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 4 +#if N >= 5 + z -= pm.vdim[3]; + if (z < pm.vdim[4]) { + VECTOR(P, 4) r = in4.read(gid.xy, z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 5 +#if N >= 6 + z -= pm.vdim[4]; + if (z < pm.vdim[5]) { + VECTOR(P, 4) r = in5.read(gid.xy, z); + out.write(r, gid.xy, gid.z); + return; + } +#endif // N >= 6 +} +#endif // V == VZ + + +#undef VV +#endif // #ifdef P diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..b7d17f2d25de544e4ce938c577e0d04f536da9af --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.metal @@ -0,0 +1,171 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct ConcatParam { + int32_t odim[4]; + int32_t axis; + int32_t offset; + int32_t trans[4]; + int32_t vdim[6]; +}; + +#define VNORMAL 1 +#define VX 2 +#define VY 3 +#define VZ 4 + +// >> fast mode +// only support concat_{2,3,4}_{2,3,4,5,6}_y_{float,half} +// only support concat_{3,4}_{2,3,4,5,6}_x_{float,half} +// only support concat_{1,2,3,4}_{2,3,4,5,6}_z_{float,half} +// >> normal mode (loop mode) +// ssd-ar: (R=4, N=3, V=z), (R=3, N=2, V=y), (R=2, N=5, V=x), (R=3, N=5, V=x) +// ssd: (R=2, N=6, V=y), (R=3, N=6, V=y) +// genet: (R=4, N=2, V=normal) + +// ssd-ar: (R=3, N=5, V=x) +#define V VX + #define R 3 + #define N 5 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + +// ssd-ar: (R=2, N=5, V=x) +#define V VX + #define R 2 + #define N 5 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + + +// ssd-ar: (R=3, N=2, V=y) +#define V VY + #define R 3 + #define N 2 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + +// ssd-ar: (R=4, N=3, V=z) +#define V VZ + #define R 4 + #define N 3 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + + +// ssd: (R=2, N=6, V=y) +#define V VY + #define R 2 + #define N 6 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + +// ssd: (R=3, N=6, V=y) +#define V VY + #define R 3 + #define N 6 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + +#define V VNORMAL + #define R 4 + #define N 2 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + + +#define V VY + #define R 2 + #define N 2 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + + +#define V VY + #define R 2 + #define N 5 + #define P float + #include "ConcatKernel.inc.metal" + #undef P + #define P half + #include "ConcatKernel.inc.metal" + #undef P + #undef N + #undef R +#undef V + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..87b60a64fc48ab89af274e0b24897e0b411599e0 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal @@ -0,0 +1,310 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + + +kernel void conv_add_batch_norm_relu_1x1_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void conv_add_batch_norm_relu_3x3_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void depthwise_conv_add_batch_norm_relu_3x3_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + half4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + half4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + + + +/*---------------------------------------------*/ + + + +kernel void conv_add_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal new file mode 100644 index 0000000000000000000000000000000000000000..274e416576743a473ba8931bcd538e9c39415f3c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddMetal.metal @@ -0,0 +1,622 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +#pragma mark - convAdd +kernel void conv_add_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = biase[gid.z]; + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } +// output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_add_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 9; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + ushort dilation_y = param.dilationY; + + float4 input[9]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i); + + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i); + + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_add_5x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = biase[gid.z]; + + ushort dilation_y = param.dilationY; + float4 input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i); + + for (int j = 0; j < 5; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void conv_add_1x5(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + float4 input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i); + + for (int j = 0; j < 5; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void depthwise_conv_add_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = biase[gid.z]; + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } +// output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + + +#pragma mark - half + +kernel void conv_add_1x1_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + half4 output = biase[gid.z]; + + half4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } +// output = output + float4(biase[gid.z]); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_add_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + half4 output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + ushort dilation_y = param.dilationY; + + half4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i); + for (int j = 0; j < 9; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(float4(input[j]), float4(weight_x)); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(float4(input[j]), float4(weight_y)); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(float4(input[j]), float4(weight_z)); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(float4(input[j]), float4(weight_w)); + } + } +// output = output + float4(biase[gid.z]); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_add_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + half4 output = biase[gid.z]; + half4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + half4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } +// output = output + float4(biase[gid.z]); + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void conv_add_5x1_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + half4 output = biase[gid.z]; + + ushort dilation_y = param.dilationY; + half4 input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i); + + for (int j = 0; j < 5; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + float4(biase[gid.z]); + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void conv_add_1x5_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + half4 output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + half4 input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i); + + for (int j = 0; j < 5; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + float4(biase[gid.z]); + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void test_conv_add_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + if (gid.x > 0 || gid.y > 0 || gid.z > 0) { return; } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 9; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + ushort dilation_x = param.dilationX; + ushort dilation_y = param.dilationY; + + float4 input[9]; + + for (uint i = 0; i < input_arr_size; ++i) { + + input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i); + + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i); + + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + // output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..069daa20e875eb00c0d518e0463987248ca8dce5 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPrelu.inc.metal @@ -0,0 +1,447 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#include "Macro.metal" + + +#pragma mark - convAdd +kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + VECTOR(P, 4) output = biase[gid.z]; + + VECTOR(P, 4) input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i); + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + +// output = output + float4(biase[gid.z]); + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 9; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + VECTOR(P, 4) output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + ushort dilation_y = param.dilationY; + + VECTOR(P, 4) input[9]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i); + + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i); + + for (int j = 0; j < 9; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } +// output = output + float4(biase[gid.z]); + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + VECTOR(P, 4) output = biase[gid.z];; + + ushort dilation_y = param.dilationY; + VECTOR(P, 4) input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i); + + for (int j = 0; j < 5; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + + +kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + VECTOR(P, 4) output = biase[gid.z]; + + ushort dilation_x = param.dilationX; + VECTOR(P, 4) input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i); + + for (int j = 0; j < 5; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device P *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + VECTOR(P, 4) output = biase[gid.z]; + VECTOR(P, 4) inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + VECTOR(P, 4) input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +#endif + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..f03a1d5b625cf01f1f1bc5ac23bebf7dabd968d9 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +#define P float + + #define PRELU_CHANNEL prelu_channel + #define PRELU_TYPE prelu_channel + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_CHANNEL + + #define PRELU_ELEMENT prelu_element + #define PRELU_TYPE prelu_element + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_ELEMENT + + #define PRELU_OTHER prelu_other + #define PRELU_TYPE prelu_other + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_OTHER + +#undef P + +#define P half + + #define PRELU_CHANNEL prelu_channel + #define PRELU_TYPE prelu_channel + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_CHANNEL + + #define PRELU_ELEMENT prelu_element + #define PRELU_TYPE prelu_element + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_ELEMENT + + #define PRELU_OTHER prelu_other + #define PRELU_TYPE prelu_other + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_OTHER + +#undef P + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..4b97b7829a1fba27704fe7b60a03b2672f4f5953 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvBNReluKernel.metal @@ -0,0 +1,297 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +#pragma mark - conv bn relu +kernel void conv_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *new_scale [[buffer(2)]], + const device float4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *new_scale [[buffer(2)]], + const device float4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + const device float4 *new_scale [[buffer(2)]], + const device float4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +#pragma mark - half +kernel void conv_batch_norm_relu_1x1_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *new_scale [[buffer(2)]], + const device half4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(float4(input), float4(weight_x)); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(float4(input), float4(weight_y)); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(float4(input), float4(weight_z)); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(float4(input), float4(weight_w)); + } + output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void conv_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *new_scale [[buffer(2)]], + const device half4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(float4(input[j]), float4(weight_x)); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(float4(input[j]), float4(weight_y)); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(float4(input[j]), float4(weight_z)); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(float4(input[j]), float4(weight_w)); + } + } + output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half *weights [[buffer(1)]], + const device half4 *new_scale [[buffer(2)]], + const device half4 *new_biase [[buffer(3)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + half4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + half4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0); + outTexture.write(half4(output), gid.xy, gid.z); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..c07515c13da54c7f8bf698f976e47f7cda6de32b --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal @@ -0,0 +1,280 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +// conv +#pragma mark -- conv +kernel void conv_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void conv_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(float4(input[j]), float4(weight_x)); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(float4(input[j]), float4(weight_y)); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(float4(input[j]), float4(weight_z)); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(float4(input[j]), float4(weight_w)); + } + } + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void depthwise_conv_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + half4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + half4 input = inputs[j]; + output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]); + output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]); + output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]); + output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]); + } + outTexture.write(half4(output), gid.xy, gid.z); +} + +kernel void conv_1x1_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + half4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(float4(input), float4(weight_x)); + + half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(float4(input), float4(weight_y)); + + half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(float4(input), float4(weight_z)); + + half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(float4(input), float4(weight_w)); + } + outTexture.write(half4(output), gid.xy, gid.z); +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..baf3f31157a472412bb08ccb3c803f5ec9e25d9c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal @@ -0,0 +1,174 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct MetalConvTransposeParam{ + ushort kernelW; + ushort kernelH; + + ushort strideX; + ushort strideY; + + ushort paddingX; + ushort paddingY; + + ushort dilationX; + ushort dilationY; +}; + +kernel void conv_transpose2x2_stride2(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvTransposeParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + int input_array_size = inTexture.get_array_size(); + int kernel_index_x = gid.x % 2; + int kernel_index_y = gid.y % 2; + int kernel_index = kernel_index_y * 2 + kernel_index_x; + int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size); + int input_x = gid.x / 2; + int input_y = gid.y / 2; + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 output = float4(0.0); + for (int i = 0; i < input_array_size; ++i) { + + float4 input = inTexture.sample(sample, float2(input_x, input_y), i); + + float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i]; + float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i]; + float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i]; + float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i]; + + output.x += dot(input, kernel_slice0); + + output.y += dot(input, kernel_slice1); + + output.z += dot(input, kernel_slice2); + + output.w += dot(input, kernel_slice3); + } + + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_transpose2x2_stride2_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvTransposeParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + int input_array_size = inTexture.get_array_size(); + int kernel_index_x = gid.x % 2; + int kernel_index_y = gid.y % 2; + int kernel_index = kernel_index_y * 2 + kernel_index_x; + int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size); + int input_x = gid.x / 2; + int input_y = gid.y / 2; + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 output = float4(0.0); + for (int i = 0; i < input_array_size; ++i) { + + half4 input = inTexture.sample(sample, float2(input_x, input_y), i); + + half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i]; + half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i]; + half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i]; + half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i]; + + output.x += dot(float4(input), float4(kernel_slice0)); + + output.y += dot(float4(input), float4(kernel_slice1)); + + output.z += dot(float4(input), float4(kernel_slice2)); + + output.w += dot(float4(input), float4(kernel_slice3)); + } + + outTexture.write(half4(output), gid.xy, gid.z); +} + +//kernel void conv_transpose(texture2d_array inTexture [[texture(0)]], +// texture2d_array outTexture [[texture(1)]], +// constant MetalConvTransposeParam ¶m [[buffer(0)]], +// const device float4 *weights [[buffer(1)]], +// uint3 gid [[thread_position_in_grid]]){ +// if (gid.x >= outTexture.get_width() || +// gid.y >= outTexture.get_height() || +// gid.z >= outTexture.get_array_size()) { +// return; +// } +// +// int input_array_size = inTexture.get_array_size(); +// +// uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH; +// +// uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice); +// +// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); +// +// float4 output; +// +// for (int w = 0; w < param.kernelW; ++w) { +// int top = gid.x - w * param.dilationX + param.paddingX; +// int input_x = top / param.strideX; +// if (top < 0 || input_x >= int(inTexture.get_width())) { +// continue; +// } +// +// for (int h = 0; h < param.kernelH; ++h) { +// int top_y = gid.y - h * param.dilationY + param.paddingY; +// int input_y = top_y / param.strideY; +// if (top_y < 0 || input_y >= int(inTexture.get_height())) { +// continue; +// } +// +// uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size(); +// +// for (int slice = 0; slice < input_array_size; ++slice) { +// +// float4 input; +// float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice]; +// float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice]; +// +// float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice]; +// +// float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice]; +// +// input = inTexture.sample(sample, float2(input_x, input_y), slice); +// output.x += dot(input, kernel_slice); +// output.y += dot(input, kernel_slice1); +// output.z += dot(input, kernel_slice2); +// output.w += dot(input, kernel_slice3); +// } +// } +// } +// +// outTexture.write(output, gid.xy, gid.z); +//} +// diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal new file mode 100644 index 0000000000000000000000000000000000000000..b152df828106acd96171a89f4f636f308e0e9e39 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal @@ -0,0 +1,100 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct ElementwiseAddParam { + int32_t fast; + int32_t axis; + int32_t ylen; + int32_t xdim[4]; + int32_t xtrans[4]; + int32_t ydim[4]; + int32_t ytrans[4]; +}; + +kernel void elementwise_add(texture2d_array inputX [[texture(0)]], + texture2d_array inputY [[texture(1)]], + texture2d_array outTexture [[texture(2)]], + constant ElementwiseAddParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + float4 rx, ry; + + if (pm.fast == 1) { + rx = inputX.read(gid.xy, gid.z); + ry = inputY.read(gid.xy, gid.z); + } else { + rx = inputX.read(gid.xy, gid.z); + int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; + int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; + int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; + int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + int32_t yshift = 4 - pm.ylen - pm.axis; + for (int n = 0; n < 4; n++) { + x_xyzn[3] = n; + xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); + invtrans(xtrans, x_abcd, t_abcd); + for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { + y_abcd[yshift+k] = t_abcd[k]; + } + trans(ytrans, y_abcd, t_abcd); + abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); + ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; + } + } + float4 r = rx + ry; + outTexture.write(r, gid.xy, gid.z); +} + +kernel void elementwise_add_half(texture2d_array inputX [[texture(0)]], + texture2d_array inputY [[texture(1)]], + texture2d_array outTexture [[texture(2)]], + constant ElementwiseAddParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + half4 rx, ry; + + if (pm.fast == 1) { + rx = inputX.read(gid.xy, gid.z); + ry = inputY.read(gid.xy, gid.z); + } else { + rx = inputX.read(gid.xy, gid.z); + int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; + int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; + int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; + int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + int32_t yshift = 4 - pm.ylen - pm.axis; + for (int n = 0; n < 4; n++) { + x_xyzn[3] = n; + xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); + invtrans(xtrans, x_abcd, t_abcd); + for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { + y_abcd[yshift+k] = t_abcd[k]; + } + trans(ytrans, y_abcd, t_abcd); + abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); + ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; + } + } + half4 r = rx + ry; + outTexture.write(r, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..b1d68d680962c53778d624ab15bfcfeb1d1a3142 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.inc.metal @@ -0,0 +1,91 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#include +#include "Macro.metal" + +using namespace metal; + +kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array inputX [[texture(0)]], + texture2d_array inputY [[texture(1)]], + texture2d_array outTexture [[texture(2)]], + constant ElementwiseAddParam &pm [[buffer(0)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(1)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(1)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(1)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + VECTOR(P, 4) rx, ry; + + if (pm.fast == 1) { + rx = inputX.read(gid.xy, gid.z); + ry = inputY.read(gid.xy, gid.z); + } else { + rx = inputX.read(gid.xy, gid.z); + int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; + int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; + int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; + int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + int32_t yshift = 4 - pm.ylen - pm.axis; + for (int n = 0; n < 4; n++) { + x_xyzn[3] = n; + xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); + invtrans(xtrans, x_abcd, t_abcd); + for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { + y_abcd[yshift+k] = t_abcd[k]; + } + trans(ytrans, y_abcd, t_abcd); + abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); + ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; + } + } + VECTOR(P, 4) output = rx + ry; + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + + outTexture.write(output, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..8fd1a9fdab8c86fbc52f6dab9c448b7b0f27d403 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ElementwiseAddPreluKernel.metal @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +struct ElementwiseAddParam { + int32_t fast; + int32_t axis; + int32_t ylen; + int32_t xdim[4]; + int32_t xtrans[4]; + int32_t ydim[4]; + int32_t ytrans[4]; +}; + +#define P float + +#define PRELU_CHANNEL prelu_channel +#define PRELU_TYPE channel +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_CHANNEL + +#define PRELU_ELEMENT element +#define PRELU_TYPE prelu_element +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_ELEMENT + +#define PRELU_OTHER other +#define PRELU_TYPE prelu_other +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_OTHER + +#undef P + +#define P half + +#define PRELU_CHANNEL channel +#define PRELU_TYPE channel +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_CHANNEL + +#define PRELU_ELEMENT element +#define PRELU_TYPE prelu_element +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_ELEMENT + +#define PRELU_OTHER other +#define PRELU_TYPE prelu_other +#include "ElementwiseAddPreluKernel.inc.metal" +#undef PRELU_TYPE +#undef PRELU_OTHER + +#undef P + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..b7d7028d46356e0dae21b352161de31b0820ff1a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void fetch(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + int input_height = inTexture.get_height(); + const float4 input = inTexture.read(gid.xy, gid.z); + int output_to = 4 * input_width * input_height; + output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x; + output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y; +// output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z; +// output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w; +} + + +kernel void fetch_half(texture2d_array inTexture [[texture(0)]], + device float * output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + int input_height = inTexture.get_height(); + const half4 input = inTexture.read(gid.xy, gid.z); + int output_to = 4 * input_width * input_height; + output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x; + output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y; +// output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z; +// output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w; + +} + +kernel void fetch_placeholder(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + +} + +kernel void fetch_placeholder_half(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal new file mode 100644 index 0000000000000000000000000000000000000000..368509f001aca6361b81b9b7839cf24b2efc5c12 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal @@ -0,0 +1,69 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +// 占位函数, 啥也没干 +kernel void place_holder(texture2d inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { +} + +struct OutputDim { + ushort width; + ushort height; + ushort strideX; + ushort strideY; +}; + +kernel void resize(texture2d inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant OutputDim ¶ms [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + + constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint2 pos = gid.xy * uint2(params.strideX, params.strideY); + const half4 input = inTexture.read(pos); + outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z); +} + + +kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height()){ + return; + } + const float4 input = inTexture.read(gid.xy); + outTexture.write(input, gid.xy, 0); +} + +kernel void texture2d_to_2d_array_half(texture2d inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height()){ + return; + } + const half4 input = inTexture.read(gid.xy); + outTexture.write(input, gid.xy, 0); +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal new file mode 100644 index 0000000000000000000000000000000000000000..950d7d5f0555b841da57554ff61f2f5cdbcae7aa --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal @@ -0,0 +1,29 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c +#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d +#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e + +#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) +#define VECTOR(p, n) CONCAT2(p, n) + +#define FUNC3_(a, b, c) CONCAT3_(a, b, c) + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..44c57440e1ec138717ad1bc569fd772e0d7ede1a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void nms_fetch_result(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + const float4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input.x; + +} + + +kernel void nms_fetch_result_half(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + const half4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input.x; +} + +kernel void nms_fetch_bbox(texture2d_array inTexture [[texture(0)]], + device float4 *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); +// int input_height = inTexture.get_height(); + const float4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input; +} + +kernel void nms_fetch_bbox_half(texture2d_array inTexture [[texture(0)]], + device float4 *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); +// int input_height = inTexture.get_height(); + const half4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = float4(input); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..1f2f7240db2ba716090001ed539bddb87dff5117 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PoolKernel.metal @@ -0,0 +1,93 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +struct PoolParam { + int ksizeX; + int ksizeY; + int strideX; + int strideY; + int paddingX; + int paddingY; + int poolType; +}; + +kernel void pool(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant PoolParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + int xmin = gid.x * pm.strideX - pm.paddingX; + int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width())); + xmin = max(xmin, 0); + int ymin = gid.y * pm.strideX - pm.paddingX; + int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height())); + ymin = max(ymin, 0); + + float4 r = 0; + if (pm.poolType == 0) { + r = inTexture.read(uint2(xmin, ymin), gid.z); + for (int x = xmin; x < xmax; x++) { + for (int y = ymin; y < ymax; y++) { + r = fmax(r, inTexture.read(uint2(x, y), gid.z)); + } + } + } else if (pm.poolType == 1) { + for (int x = xmin; x < xmax; x++) { + for (int y = ymin; y < ymax; y++) { + r += inTexture.read(uint2(x, y), gid.z); + } + } + r /= pm.ksizeX * pm.ksizeY; + } + outTexture.write(r, gid.xy, gid.z); +} + +kernel void pool_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant PoolParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + int xmin = gid.x * pm.strideX - pm.paddingX; + int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width())); + xmin = max(xmin, 0); + int ymin = gid.y * pm.strideX - pm.paddingX; + int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height())); + ymin = max(ymin, 0); + + half4 r = 0; + if (pm.poolType == 0) { + r = inTexture.read(uint2(xmin, ymin), gid.z); + for (int x = xmin; x < xmax; x++) { + for (int y = ymin; y < ymax; y++) { + r = fmax(r, inTexture.read(uint2(x, y), gid.z)); + } + } + } else if (pm.poolType == 1) { + for (int x = xmin; x < xmax; x++) { + for (int y = ymin; y < ymax; y++) { + r += inTexture.read(uint2(x, y), gid.z); + } + } + r /= pm.ksizeX * pm.ksizeY; + } + outTexture.write(r, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..597804137743dd253d05d91a5008f558dcaf42e7 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal @@ -0,0 +1,151 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void prelu_channel(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + float4 alpha_value = alpha[gid.z]; + float4 output; + output.x = input.x > 0 ? input.x : (alpha_value.x * input.x); + output.y = input.y > 0 ? input.y : (alpha_value.y * input.y); + output.z = input.z > 0 ? input.z : (alpha_value.z * input.z); + output.w = input.w > 0 ? input.w : (alpha_value.w * input.w); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void prelu_element(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + + int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size(); + float4 alpha_value = alpha[alpha_to + gid.z]; + + float4 output; + output.x = input.x > 0 ? input.x : (alpha_value.x * input.x); + output.y = input.y > 0 ? input.y : (alpha_value.y * input.y); + output.z = input.z > 0 ? input.z : (alpha_value.z * input.z); + output.w = input.w > 0 ? input.w : (alpha_value.w * input.w); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void prelu_other(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + float alpha_value = alpha[0]; + float4 output; + output.x = input.x > 0 ? input.x : (alpha_value * input.x); + output.y = input.y > 0 ? input.y : (alpha_value * input.y); + output.z = input.z > 0 ? input.z : (alpha_value * input.z); + output.w = input.w > 0 ? input.w : (alpha_value * input.w); + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void prelu_channel_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device half4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + half4 alpha_value = alpha[gid.z]; + half4 output; + output.x = input.x > 0 ? input.x : (alpha_value.x * input.x); + output.y = input.y > 0 ? input.y : (alpha_value.y * input.y); + output.z = input.z > 0 ? input.z : (alpha_value.z * input.z); + output.w = input.w > 0 ? input.w : (alpha_value.w * input.w); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void prelu_element_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device half4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + + int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size(); + half4 alpha_value = alpha[alpha_to + gid.z]; + + half4 output; + output.x = input.x > 0 ? input.x : (alpha_value.x * input.x); + output.y = input.y > 0 ? input.y : (alpha_value.y * input.y); + output.z = input.z > 0 ? input.z : (alpha_value.z * input.z); + output.w = input.w > 0 ? input.w : (alpha_value.w * input.w); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void prelu_other_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device half *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z); + half alpha_value = alpha[0]; + half4 output; + output.x = input.x > 0 ? input.x : (alpha_value * input.x); + output.y = input.y > 0 ? input.y : (alpha_value * input.y); + output.z = input.z > 0 ? input.z : (alpha_value * input.z); + output.w = input.w > 0 ? input.w : (alpha_value * input.w); + outTexture.write(output, gid.xy, gid.z); +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..7630febf77210bb364f0191e8b10a5a6923d6c95 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal @@ -0,0 +1,367 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct PriorBoxMetalParam { + float offset; + float stepWidth; + float stepHeight; + float minSize; + float maxSize; + float imageWidth; + float imageHeight; + + bool clip; + + uint numPriors; + uint aspecRatiosSize; + uint minSizeSize; + uint maxSizeSize; +}; + +kernel void prior_box(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device float *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + if (gid.z < param.aspecRatiosSize) { + float ar = aspect_ratios[gid.z]; + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } else if (gid.z >= param.aspecRatiosSize) { + if (param.maxSizeSize > 0) { + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(max_box, gid.xy, gid.z); + } + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(variances_output, gid.xy, gid.z); + } +} + + +kernel void prior_box_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device half *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + if (gid.z < param.aspecRatiosSize) { + half ar = aspect_ratios[gid.z]; + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(half4(res), gid.xy, gid.z); + } else if (gid.z >= param.aspecRatiosSize) { + if (param.maxSizeSize > 0) { + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(half4(max_box), gid.xy, gid.z); + } + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(half4(variances_output), gid.xy, gid.z); + } +} + + + +kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device float *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + + + if (gid.z == 0) { + box_width = box_height = param.minSize / 2; + + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } + + if (gid.z == 1 && param.maxSizeSize > 0) { + + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(res, gid.xy, gid.z); + } + + int aspect_to = 0; + if (param.maxSizeSize > 0) { + aspect_to = gid.z - 2; + } else { + aspect_to = gid.z - 1; + } + + + + + if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) { + + int skip = 0; + for (int i = 0; i < aspect_to + 1; ++i) { + if (fabs(aspect_ratios[i] - 1.) < 1e-6) { + skip += 1; + } + } + aspect_to += skip; + + float ar = aspect_ratios[aspect_to]; + + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(variances_output, gid.xy, gid.z); + } +} + + +kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device half *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + + + if (gid.z == 0) { + box_width = box_height = param.minSize / 2; + + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + if (gid.z == 1 && param.maxSizeSize > 0) { + + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + int aspect_to = 0; + if (param.maxSizeSize > 0) { + aspect_to = gid.z - 2; + } else { + aspect_to = gid.z - 1; + } + + if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) { + float ar = aspect_ratios[aspect_to]; + + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(half4(variances_output), gid.xy, gid.z); + } +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..e725440bbe997d571f1860bce323516144a94da8 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +kernel void relu_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); + const half4 input = inTexture.read(gid.xy, gid.z); + const float4 relu = fmax((float4)input, 0.0); + outTexture.write(half4(relu), gid.xy, gid.z); +} + +kernel void relu(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); + const float4 input = inTexture.read(gid.xy, gid.z); + const float4 relu = fmax((float4)input, 0.0); + outTexture.write(float4(relu), gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..7583537c2b404b7a95eeedfb4c69793a608f18ac --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal @@ -0,0 +1,66 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c +#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d + +#define FUNC(f, r1, r2, p) CONCAT4_(f, r1, r2, p) +#define VECTOR(p, n) CONCAT2(p, n) +#define FUNC_R(f, r) CONCAT2_(f, r) + +kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant ReshapeParam &rp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + + int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4]; + ReshapeParam lrp = rp; + int oC = lrp.odim[lrp.otrans[3]]; + int iC = lrp.idim[lrp.itrans[3]]; + int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3]; + VECTOR(P, 4) r; + for (int n = 0; n < 4; n++) { + oxyzn[3] = n; +#if ROUT == 4 + xyzn2abcd_4(oC, oxyzn, oabcd); +#else + FUNC_R(xyzn2abcd, ROUT)(oxyzn, oabcd); +#endif + int tabcd[4]; + invtrans(lrp.otrans, oabcd, tabcd); + int index = abcd2index(lrp.odim, tabcd); + if (index < count) { + index2abcd(lrp.idim, index, tabcd); + trans(lrp.itrans, tabcd, iabcd); +#if RIN == 4 + abcd2xyzn_4(iC, iabcd, ixyzn); +#else + FUNC_R(abcd2xyzn, RIN)(iabcd, ixyzn); +#endif + r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]]; + } else { + r[n] = 0; + } + } + outTexture.write(r, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..d2f5815d422ec8c4f3e1e3c1992855547e002264 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.metal @@ -0,0 +1,150 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONRITIONS OF ANY KINR, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct ReshapeParam { + int32_t idim[4]; + int32_t itrans[4]; + int32_t odim[4]; + int32_t otrans[4]; +}; + +#define P float +#define RIN 4 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 3 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 2 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 1 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#undef P + +#define P half +#define RIN 4 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 3 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 2 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN + +#define RIN 1 +#define ROUT 4 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 3 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 2 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#define ROUT 1 +#include "ReshapeKernel.inc.metal" +#undef ROUT +#undef RIN +#undef P diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal new file mode 100644 index 0000000000000000000000000000000000000000..fbb4e12cb82c12f8dc5b94c397e43b8c8c5ae518 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ResizeBilinear.metal @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct resize_bilinear_param { +// int32_t out_h; +// int32_t out_w; + float ratio_h; + float ratio_w; +}; + +kernel void resize_bilinear(texture2d_array input [[texture(0)]], + texture2d_array output [[texture(2)]], + constant resize_bilinear_param & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + float4 r; + if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) { + r = input.read(gid.xy, gid.z); + } else { + float w = gid.x * pm.ratio_w; + float h = gid.y * pm.ratio_h; + uint w0 = w, h0 = h; + uint w1 = w0 + 1, h1 = h0 + 1; + float w1lambda = w - w0, h1lambda = h - h0; + float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda; + if (w1 >= input.get_width()) w1 = w0; + if (h1 >= input.get_height()) h1 = h0; + float4 r0 = input.read(uint2(w0, h0), gid.z); + float4 r1 = input.read(uint2(w1, h0), gid.z); + float4 r2 = input.read(uint2(w0, h1), gid.z); + float4 r3 = input.read(uint2(w1, h1), gid.z); + r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3); + } + output.write(r, gid.xy, gid.z); +} + +kernel void resize_bilinear_half(texture2d_array input [[texture(0)]], + texture2d_array output [[texture(2)]], + constant resize_bilinear_param & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + half4 r; + if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) { + r = input.read(gid.xy, gid.z); + } else { + half w = gid.x * pm.ratio_w; + half h = gid.y * pm.ratio_h; + uint w0 = w, h0 = h; + uint w1 = w0 + 1, h1 = h0 + 1; + half w1lambda = w - w0, h1lambda = h - h0; + half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda; + if (w1 >= input.get_width()) w1 = w0; + if (h1 >= input.get_height()) h1 = h0; + half4 r0 = input.read(uint2(w0, h0), gid.z); + half4 r1 = input.read(uint2(w1, h0), gid.z); + half4 r2 = input.read(uint2(w0, h1), gid.z); + half4 r3 = input.read(uint2(w1, h1), gid.z); + r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3); + } + output.write(r, gid.xy, gid.z); + output.write(r, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal new file mode 100644 index 0000000000000000000000000000000000000000..b50d5547193ccc9a1bef1b3ed6bbd1b7a64c3527 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Shape.metal @@ -0,0 +1,21 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void shape() { +} +kernel void shape_half() { +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..455cf1471b5c369fc27040e03b57812e8d6bf0e8 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.inc.metal @@ -0,0 +1,61 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b + +#define FUNC(f, p) CONCAT2_(f, p) +#define VECTOR(p, n) CONCAT2(p, n) + +kernel void FUNC(softmax, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant SoftmaxParam &sp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; +// int zsize = inTexture.get_array_size(); + P maxv = inTexture.read(uint2(0, gid.y), 0)[0]; + int group = sp.K / 4; + int remain = sp.K % 4; + for (int x = 0; x < group; x++) { + VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0); + maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3])))); + } + if (remain > 0) { + VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0); + for (int i = 0; i < remain; i++) { + maxv = max(maxv, r[i]); + } + } + VECTOR(P, 4) rsum = {0, 0, 0, 0}; + for (int x = 0; x < group; x++) { + VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0); + rsum += exp(r - maxv); + } + P sum = rsum[0] + rsum[1] + rsum[2] + rsum[3]; + if (remain > 0) { + VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0); + for (int i = 0; i < remain; i++) { + sum += exp(r[i] - maxv); + } + } + VECTOR(P, 4) rr = inTexture.read(gid.xy, gid.z); + rr = exp(rr - maxv) / sum; + outTexture.write(rr, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal new file mode 100644 index 0000000000000000000000000000000000000000..67c279a4441095e710985c65d85aac589b7d0f54 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal @@ -0,0 +1,29 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct SoftmaxParam { + int N; + int K; +}; + +#define P float +#include "Softmax.inc.metal" +#undef P + +#define P half +#include "Softmax.inc.metal" +#undef P diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..54e3f21e793a9c1474f13fed61857211cb7d117f --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.inc.metal @@ -0,0 +1,122 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c +#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d +#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e + +#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) +#define VECTOR(p, n) CONCAT2(p, n) +#define FUNC_R(f, r) CONCAT2_(f, r) + +#if V == VX +#define VV x +#elif V == VY +#define VV y +#elif V == VZ +#define VV z +#else +#define VV normal +#endif + +#if V == VY +kernel void FUNC(split, R, N, VV, P)(texture2d_array input [[texture(0)]], + texture2d_array out1 [[texture(1)]], + texture2d_array out2 [[texture(2)]], +#if N >= 3 + texture2d_array out3 [[texture(3)]], +#endif // N >= 3 +#if N >= 4 + texture2d_array out4 [[texture(4)]], +#endif // N >= 4 + constant SplitParam &sp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + VECTOR(P, 4) r = input.read(gid.xy, gid.z); + int y = gid.y - sp.offset; + if (y < sp.vdim[0]) { + out1.write(r, gid.xy, gid.z); + return; + } + y -= sp.vdim[0]; + if (y < sp.vdim[1]) { + out2.write(r, uint2(gid.x, y), gid.z); + return; + } +#if N >= 3 + y -= sp.vdim[1]; + if (y < sp.vdim[2]) { + out3.write(r, uint2(gid.x, y), gid.z); + return; + } +#endif // N >= 3 +#if N >= 4 + y -= sp.vdim[2]; + if (y < sp.vdim[3]) { + out4.write(r, uint2(gid.x, y), gid.z); + return; + } +#endif // N >= 4 +} +#endif // V == VY + + +#if V == VX +kernel void FUNC(split, R, N, VV, P)(texture2d_array input [[texture(0)]], + texture2d_array out1 [[texture(1)]], + texture2d_array out2 [[texture(2)]], +#if N >= 3 + texture2d_array out3 [[texture(3)]], +#endif // N >= 3 +#if N >= 4 + texture2d_array out4 [[texture(4)]], +#endif // N >= 4 + constant SplitParam &sp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + VECTOR(P, 4) r = input.read(gid.xy, gid.z); + int x = gid.x; + if (x < sp.vdim[0]) { + out1.write(r, gid.xy, gid.z); + return; + } + x -= sp.vdim[0]; + if (x < sp.vdim[1]) { + out2.write(r, uint2(x, gid.y), gid.z); + return; + } +#if N >= 3 + x -= sp.vdim[1]; + if (x < sp.vdim[2]) { + out3.write(r, uint2(x, gid.y), gid.z); + return; + } +#endif // N >= 3 +#if N >= 4 + x -= sp.vdim[2]; + if (x < sp.vdim[3]) { + out4.write(r, uint2(x, gid.y), gid.z); + return; + } +#endif // N >= 4 +} +#endif // V == VX + + + +#undef VV +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal new file mode 100644 index 0000000000000000000000000000000000000000..4c1e818d2bf5c7266169f406fbfaf8e322685dc4 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Split.metal @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct SplitParam { + int32_t idim[4]; + int32_t axis; + int32_t offset; + int32_t trans[4]; + int32_t vdim[4]; +}; + +#define VNORMAL 1 +#define VX 2 +#define VY 3 +#define VZ 4 + +// only support split_{2, 3, 4}_{2, 3, 4}_y_{float, half} +// only support split_{3, 4}_{2, 3, 4}_x_{float, half} + + +//// ssd-ar: (R=3, N=2, V=y) +#define V VY + #define R 3 + #define N 2 + #define P float + #include "Split.inc.metal" + #undef P + #define P half + #include "Split.inc.metal" + #undef P + #undef N + #undef R +#undef V + + +//// ssd-ar: (R=2, N=2, V=y) +#define V VY + #define R 2 + #define N 2 + #define P float + #include "Split.inc.metal" + #undef P + #define P half + #include "Split.inc.metal" + #undef P + #undef N + #undef R +#undef V diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal new file mode 100644 index 0000000000000000000000000000000000000000..534166e45fc3db49cc5de526ec0d5179ca3f9899 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.inc.metal @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#ifdef P + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c + +#define FUNC(f, r, p) CONCAT3_(f, r, p) +#define VECTOR(p, n) CONCAT2(p, n) + +kernel void FUNC(transpose, R, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant TransposeParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + VECTOR(P, 4) r; + int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}; + int iabcd[4], oabcd[4], ixyzn[4]; + for (int n = 0; n < 4; n++) { + oxyzn[3] = n; +#if R == 4 + xyzn2abcd_4(pm.oC, oxyzn, iabcd); +#endif // R == 4 +#if R == 3 + xyzn2abcd_3(oxyzn, oabcd); +#endif // R == 3 +#if R == 2 + xyzn2abcd_2(oxyzn, oabcd); +#endif // R == 2 + iabcd[pm.axis[0]] = oabcd[0]; + iabcd[pm.axis[1]] = oabcd[1]; + iabcd[pm.axis[2]] = oabcd[2]; + iabcd[pm.axis[3]] = oabcd[3]; +#if R == 4 + abcd2xyzn_4(pm.iC, iabcd, ixyzn); +#endif // R == 4 +#if R == 3 + abcd2xyzn_3(iabcd, ixyzn); +#endif // R == 3 +#if R == 2 + abcd2xyzn_2(iabcd, ixyzn); +#endif // R == 2 + r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]]; + } + outTexture.write(r, gid.xy, gid.z); +} + +#endif diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..321663b9b7f09eba2041cb0932215d291e44aba6 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/TransposeKernel.metal @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +struct TransposeParam { + int iC; + int oC; + int axis[4]; +}; + +kernel void transpose_copy_float(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant TransposeParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z); +} +kernel void transpose_copy_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant TransposeParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z); +} + +#define R 4 + #define P float + #include "TransposeKernel.inc.metal" + #undef P + #define P half + #include "TransposeKernel.inc.metal" + #undef P +#undef R + +#define R 3 + #define P float + #include "TransposeKernel.inc.metal" + #undef P + #define P half + #include "TransposeKernel.inc.metal" + #undef P +#undef R + +#define R 2 + #define P float + #include "TransposeKernel.inc.metal" + #undef P + #define P half + #include "TransposeKernel.inc.metal" + #undef P +#undef R diff --git a/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..fc1b3164c9cf623a1bc4d350cc8a5f72c369bae4 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift @@ -0,0 +1,69 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class MulticlassNMSParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope) + bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope) + output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope) + + middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims) + + bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims) + } catch let error { + throw error + } + } + var bboxOutput: FetchHolder + var middleOutput: FetchHolder + let scores: Texture

+ let bboxes: Texture

+ var output: Texture

+} + +class MulticlassNMSOp: Operator, MulticlassNMSParam

>, Runable, Creator, InferShaperable{ + + func inputVariant() -> [String : [Variant]] { + return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]] + } + + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let _ { + fatalError() + } + } + + func inferShape() { + // para.output.dim = para.input.dim + } + + typealias OpType = MulticlassNMSOp

+ func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + + } + + func delogOutput() { + print(" nms - output: ") + print(para.bboxes.metalTexture.float32Array().strideArray()) + } +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift index 07676defe71ec18560df4be630cd04008cd1aad6..6f42f2aa9f8d0515946ace625ed16c5040fd3099 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift @@ -15,54 +15,60 @@ import Foundation class PoolParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope) - output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope) - poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs) - ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs) - stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs) - padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs) - ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs) - globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs) - } catch let error { - throw error - } -// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self) + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope) + poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs) + ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs) + stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs) + padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs) + ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs) + globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs) + assert(input.transpose == [0, 2, 3, 1]) + } catch let error { + throw error } - let input: Texture

- var output: Texture

- var ksize: [Int32] - var stride: [Int32] - var padding: [Int32] - var poolType: String - var ceilMode: Bool - var globalPooling: Bool + // let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self) + } + let input: Texture

+ var output: Texture

+ var ksize: [Int32] + var stride: [Int32] + var padding: [Int32] + var poolType: String + var ceilMode: Bool + var globalPooling: Bool } class PoolOp: Operator, PoolParam

>, Runable, Creator, InferShaperable{ - - func inferShape() { - // para.output.dim = para.input.dim - } - - typealias OpType = PoolOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } + + typealias OpType = PoolOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + - func delogOutput() { - print("pool2d delog") - let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true) - print(para.ksize) - print(para.stride) - print(para.padding) - print(para.poolType) - let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true) - } +// print("pool2d delog") +// let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true) +// print(para.ksize) +// print(para.stride) +// print(para.padding) +// print(para.poolType) +// let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true) + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..2d7987e937b9ddf6410ebb0d23bb89c76c1a13ce --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class PreluParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + alpha = try PreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) + mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + let mode: String + let alpha: Tensor

+ let input: Texture

+ var output: Texture

+} + +class PreluOp: Operator, PreluParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = PreluOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) input: ") + print(para.input.metalTexture.toTensor(dim: (n: para.input.padToFourDim[0], c: para.input.padToFourDim[1], h: para.input.padToFourDim[2], w: para.input.padToFourDim[3])).strideArray()) + + print(" \(type) Alpha: ") + let _: Float32? = para.alpha.buffer.logDesc(header: " alpha: ", stridable: false) + + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray()) + } + +// print("softmax delog") +// let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) +// let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..2a9f18463483a024545300661e1db33cedce585b --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -0,0 +1,124 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class PriorBoxParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs) + } catch _ { + } + + do { + input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope) + output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope) + inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope) + outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope) + minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs) + maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs) + aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs) + variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs) + flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs) + clip = try PriorBoxParam.getAttr(key: "clip", attrs: opDesc.attrs) + stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs) + stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs) + offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + + var min_max_aspect_ratios_order: Bool = false + let minSizes: [Float32] + let maxSizes: [Float32] + let aspectRatios: [Float32] + var newAspectRatios: MTLBuffer? + let variances: [Float32] + let flip: Bool + let clip: Bool + var stepW: Float32 + var stepH: Float32 + let offset: Float32 + + let input: Texture

+ let inputImage: Texture

+ var output: Texture

+ let outputVariances: Texture

+} + +class PriorBoxOp: Operator, PriorBoxParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = PriorBoxOp

+ + func inferShape() { + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + + print(" \(type) output: ") + // output +// let outputArray = para.output.metalTexture.float32Array() +// print(outputArray.strideArray()) +// let device = para.input.metalTexture!.device +// let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3]) +// let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3]) +// print("boxes: ") +// print(boxes.strideArray()) +// print("variances: ") +// print(variances.strideArray()) + // output + print(" \(type) output: ") + + let box = para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])) + print(" dim: \(para.output.dim)") + print(box.strideArray()) +// print((0.. Float32 in +// return o +// } +// +// print(" output variance: \(outputVarianceArray)") + +// writeToLibrary(fileName: "variance_out", array: outputVarianceArray) + + } +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift index f65e402cdd2b6356199a2104f99556cd4fdd3b6a..7748df75fef3a2280a51dda159ead0392e146443 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift @@ -1,47 +1,58 @@ -///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. */ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + import Foundation class ReluParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope) - output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope) + } catch let error { + throw error } - let input: Texture

- var output: Texture

+ } + let input: Texture

+ var output: Texture

} class ReluOp: Operator, ReluParam

>, Runable, Creator, InferShaperable{ - - func inferShape() { - para.output.dim = para.input.dim - } - - typealias OpType = ReluOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } + + typealias OpType = ReluOp

+ + func inferShape() { + para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } + } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift index 759ffd4b8b46673e5245f8bbc67dbcc0956666aa..ac46baca91bd6eedab9241da68a05d08391ec931 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift @@ -15,36 +15,63 @@ import Foundation class ReshapeParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) - output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) - } catch let error { - throw error + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) + shape = try ReshapeParam.getAttr(key: "shape", attrs: opDesc.attrs) + + var s: [Int] = shape.map { Int($0) } + + var di = -1 + var ml = 1 + for i in 0..= 0 { + s[di] = input.dim.numel() / ml + } + output.tensorDim = Dim.init(inDim: s) + var dim: [Int] = [1, 1, 1, 1] + for i in 0.. - var output: Texture

+ } + let input: Texture

+ let shape: [Int32] + var output: Texture

} class ReshapeOp: Operator, ReshapeParam

>, Runable, Creator, InferShaperable{ - - func inferShape() { - // para.output.dim = para.input.dim - } - - typealias OpType = ReshapeOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } - } - func delogOutput() { - print("reshape delog") - let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false) - let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false) + + typealias OpType = ReshapeOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + func delogOutput() { + print("reshape delog") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) +// print(outputArray) + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..e0e699cdb8b3a17eb109877f1a7bd986b5e07403 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ResizeBilinearOp.swift @@ -0,0 +1,64 @@ +///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. */ + +import Foundation + +class ResizeBilinearParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope) +// if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) { +// fatalError() +// } + output = try ResizeBilinearParam.outputOut(outputs: opDesc.outputs, from: inScope) + out_h = try ResizeBilinearParam.getAttr(key: "out_h", attrs: opDesc.attrs) + out_w = try ResizeBilinearParam.getAttr(key: "out_w", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + let input: Texture

+ var output: Texture

+ let out_h: Int32 + let out_w: Int32 +} + +class ResizeBilinearOp: Operator, ResizeBilinearParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = ResizeBilinearOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..b37eed0a9d398923bb866444cf224cb79bb2fecc --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ShapeOp.swift @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ShapeParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope) + output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope) + } catch let error { + throw error + } + } + var output: Texture

+ let input: Texture

+} + +class ShapeOp: Operator, ShapeParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = ShapeOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift index d323b21cfa7729876a78702d0098c267132b4ab1..66b5c7b3146d4c433e12b846a971e4b5ae579f79 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift @@ -15,36 +15,48 @@ import Foundation class SoftmaxParam: OpParam { - typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { - do { - input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) - output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) - } catch let error { - throw error - } + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) + + assert(input.tensorDim.dims.count == 2) + assert(input.transpose == [0, 1, 2, 3]) + + output.dim = input.dim + output.tensorDim = input.tensorDim + output.padToFourDim = input.padToFourDim + } catch let error { + throw error } - let input: Texture

- var output: Texture

+ } + let input: Texture

+ var output: Texture

} class SoftmaxOp: Operator, SoftmaxParam

>, Runable, Creator, InferShaperable{ - - func inferShape() { - // para.output.dim = para.input.dim + typealias OpType = SoftmaxOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error } + } + + func delogOutput() { + print("softmax delog") + print(para.input) - typealias OpType = SoftmaxOp

- func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } - } - func delogOutput() { - print("softmax delog") - let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) - let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) - } + print(para.output) + let padToFourDim = para.output.padToFourDim + let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) + print(outputArray.strideArray()) + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..4495902a46426e2a866ba81a2aa761951605f940 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class SplitParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope) + output = Texture

.init(device: input.metalTexture!.device, inDim: input.dim) + axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs) + sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs) + if axis < 0 { + axis = input.tensorDim.cout() + axis + } + guard let outlist = opDesc.outputs["Out"] else { + fatalError() + } + for out in outlist { + guard let variant = inScope[out], let v = variant as? Texture

else { + fatalError() + } + outputList.append(v) + sections.append(Int32(v.tensorDim.dims[axis])) + } + } catch let error { + throw error + } + } + + var axis: Int + let input: Texture

+ var output: Texture

+ var outputList: [Texture

] = [] + var sections: [Int32] = [] +} + +class SplitOp: Operator, SplitParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = SplitOp

+ + func inferShape() { + // para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.input.metalTexture!.device + for out in para.outputList { + let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose) + print(arr.strideArray()) + } + } + +} + + + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..8b695ec76fcd46b46f503e21e70f8aac52cee717 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift @@ -0,0 +1,58 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class TransposeParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope) + axis = try TransposeParam.getAttr(key: "axis", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + let input: Texture

+ var output: Texture

+ let axis: [Int32] +} + +class TransposeOp: Operator, TransposeParam

>, Runable, Creator, InferShaperable{ + + typealias OpType = TransposeOp

+ + func inferShape() { + //para.output.dim = para.input.dim + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift new file mode 100644 index 0000000000000000000000000000000000000000..1d5ca03ecb9c0af1b83412ad44c343267f35a64b --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift @@ -0,0 +1,209 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Metal +import MetalKit +import Foundation + +@objc public enum Platform: Int{ + case CPU, GPU +} + +class ScaleKernel: CusomKernel { + init(device: MTLDevice, shape: Shape) { + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false) + } else { + fatalError(" unsupport ") + } + } + +} + +public class Runner: NSObject { + var program: Program? + var executor: Executor? + var queue: MTLCommandQueue? + var textureLoader: MTKTextureLoader? + public let net: Net + let device: MTLDevice? + let platform: Platform + var cpuPaddleMobile: PaddleMobileCPU? + let numel: Int + let meansNumber: [NSNumber] + + // dims num nchw + let dimsNum: [NSNumber] + /** + * inNet: 需要运行的网络 + * commandQueue: GPU 是需要传入 + * inPlatform: 需要使用的平台, GPU or CPU + */ + @objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) { + net = inNet + queue = commandQueue + device = queue?.device + platform = inPlatform + if let inDevice = device { + textureLoader = MTKTextureLoader.init(device: inDevice) + } + if platform == .CPU { + cpuPaddleMobile = PaddleMobileCPU.init() + } + numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w + meansNumber = net.means.map { NSNumber.init(value: $0) } + dimsNum = [NSNumber.init(value: net.dim.n), + NSNumber.init(value: net.dim.c), + NSNumber.init(value: net.dim.h), + NSNumber.init(value: net.dim.w)] + } + + /** + * load 模型, 返回 true 可进行预测 + */ + @objc public func load() -> Bool { + if platform == .GPU { + guard let inDevice = device, let inQueue = queue else { + print(" paddle mobile gpu load error, need MTLCommandQueue") + return false + } + let loader = Loader.init() + do { + program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize) +// program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath) + net.updateProgram(program: program!) + + executor = try Executor.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!) + } catch let error { + print(error) + return false + } + } else { + return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false + } + return true + } + + @objc public func predict(inputPointer: UnsafeMutablePointer, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) { + + guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else { + completion(false, nil) + return + } + completion(true, res) + } + + /** + * GPU 版本 predict + * texture: 需要预测的 texture 需要做过预处理 + * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组 + */ + @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) { + do { + try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in + guard let SSelf = self else { + fatalError( " self nil " ) + } + let result = SSelf.net.fetchResult(paddleMobileRes: res) + completion(true, result) + }, preProcessKernle: self.net.preprocessKernel, except: self.net.except) + } catch let error { + print(error) + completion(false, nil) + return + } + } + + /** + * CPU GPU 通用版本 predict + * cgImage: 需要预测的图片 + * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组 + */ +// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) { +// if platform == .GPU { +// getTexture(image: cgImage) { [weak self] (texture) in +// guard let SSelf = self else { +// fatalError( "" ) +// } +// SSelf.predict(texture: texture, completion: completion) +// } +// } else if platform == .CPU { +// let input = preproccess(image: cgImage) +// predict(inputPointer: input, completion: completion) +// input.deinitialize(count: numel) +// input.deallocate() +// } +// } + + /* + * 清理内存, 调用此函数后, 不能再使用, 需重新 load + */ + @objc public func clear() { + if platform == .GPU { + executor?.clear() + executor = nil + program = nil + } else if platform == .CPU { + cpuPaddleMobile?.clear() + } + } + + @objc public func preproccess(image: CGImage) -> UnsafeMutablePointer { + let output = UnsafeMutablePointer.allocate(capacity: numel) + let means = net.means.map { NSNumber.init(value: $0) } + let dims = [NSNumber.init(value: net.dim.n), + NSNumber.init(value: net.dim.c), + NSNumber.init(value: net.dim.h), + NSNumber.init(value: net.dim.w)] + cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims) + return output + } + + /* + * 获取 texture, 对 texture 进行预处理, GPU 预测时使用 + */ + @objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { + let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error" + scaleTexture(input: texture!, complete: getTexture) + } + + public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) { + + guard let inQueue = queue, let inDevice = device else { + fatalError( " queue or devcie nil " ) + } + + guard let buffer = inQueue.makeCommandBuffer() else { + fatalError( " make buffer error" ) + } + + let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3)) + + do { + try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer) + } catch let error { + print(error) + fatalError() + } + + buffer.addCompletedHandler { (buffer) in + complete(scaleKernel.outputTexture) + } + buffer.commit() + } +} + + diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h new file mode 100644 index 0000000000000000000000000000000000000000..00149053dfe6891f07f816feef524db35474a18b --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.h @@ -0,0 +1,107 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#import +#import + +typedef enum : NSUInteger { + MobileNetType, + MobileNetSSDType, + GenetType, +} NetType; + +@interface PaddleMobileGPUResult: NSObject + +@property (assign, nonatomic) float *output; + +@property (assign, nonatomic) int outputSize; + +-(void)releaseOutput; + +@end + +@interface ModelConfig: NSObject + +/* + * 预处理需要用到的值 (三个) + */ +@property (strong, nonatomic) NSArray *means; +/* + * 预处理需要用到的 scale 值 + */ +@property (assign, nonatomic) float scale; + +/* + * 输出维度信息 [n c h w] + */ +@property (strong, nonatomic) NSArray *dims; + + +/* + * 模型参数内存地址 + */ +@property (assign, nonatomic) void *paramPointer; + +/* + * 模型参数占用内存大小 (kb) + */ +@property (assign, nonatomic) int paramSize; + +/* + * 模型内存地址 + */ +@property (assign, nonatomic) void *modelPointer; + +/* + * 模型占用内存大小 (kb) + */ +@property (assign, nonatomic) int modelSize; + +@end + +@interface PaddleMobileGPU: NSObject + +/* + * 初始化 + */ +-(instancetype)initWithCommandQueue:(id)queue net:(NetType)netType modelConfig:(ModelConfig *)config; + +/* + * paramPointer 模型参数内存地址 + * paramSize 模型参数占用内存大小 (kb) + * modelPointer 模型内存地址 + * modelSize 模型占用内存大小 (kb) + */ +-(BOOL)load; + +/* + * texture: 需要进行预测的图像转换的 texture + * completion: 预测完成回调 + */ +-(void)predict:(id)texture withCompletion:(void (^)(BOOL, NSArray *))completion; + +/* + * texture: 需要进行预测的图像转换的 texture + * completion: 预测完成回调 + */ +-(void)predict:(id)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion; + +/* + * 清理内存 + */ +-(void)clear; + +@end + + diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m new file mode 100644 index 0000000000000000000000000000000000000000..4e56bf2f98db9cda0d36587bef576e90b3ee6553 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/PaddleMobileGPU.m @@ -0,0 +1,95 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#import +#import "PaddleMobileGPU.h" +#import "paddle_mobile.h" +#import + +@implementation ModelConfig +@end + +@interface PaddleMobileGPUResult () + +@property (strong, nonatomic) ResultHolder *resultHolder; + +- (void)setOutputResult:(ResultHolder *)resultHolder; + +@end + +@implementation PaddleMobileGPUResult +- (void)setOutputResult:(ResultHolder *)resultHolder { + self.resultHolder = resultHolder; + self.output = resultHolder.result; + self.outputSize = resultHolder.capacity; +} + +-(void)releaseOutput { + [self.resultHolder releasePointer]; +} +@end + +@interface PaddleMobileGPU () +{ + Runner *runner; +} +@end + +@implementation PaddleMobileGPU + +-(instancetype)initWithCommandQueue:(id)queue net:(NetType)netType modelConfig:(ModelConfig *)config { + self = [super init]; + if (self) { + Net *net = nil; + if (netType == GenetType) { + net = [[Genet alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize]; + } else if (netType == MobileNetSSDType) { + net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize]; + } else if (netType == MobileNetType) { + + } + runner = [[Runner alloc] initInNet:net commandQueue:queue inPlatform:PlatformGPU]; + } + return self; +} + +-(BOOL)load { + return [runner load]; +} + +-(void)predict:(id)texture withCompletion:(void (^)(BOOL, NSArray *))completion { + [runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) { + NSMutableArray *resultArray = [NSMutableArray arrayWithCapacity:result.capacity]; + for (int i = 0; i < result.capacity; ++i) { + [resultArray addObject:[NSNumber numberWithFloat:result.result[i]]]; + } + completion(success, resultArray); + [result releasePointer]; + + }]; +} + +-(void)predict:(id)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion { + [runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) { + PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init]; + [gpuResult setOutputResult:result]; + completion(success, gpuResult); + }]; +} + +-(void)clear { + [runner clear]; +} + +@end diff --git a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift index 8e1915a4975d5e444c2a5c0d0ee9e19d3cbe7577..98dd7ff39a71cadfe6cc33f3d468448ac5155242 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift @@ -14,7 +14,7 @@ import Foundation -struct BlockDesc { +class BlockDesc { let index: Int let parentIndex: Int let vars: [VarDesc] @@ -48,8 +48,10 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { var description: String { var str = "" - for op in ops { - str += op.description + for i in 0.. Bool) -> [String : [String]] in - var map: [String : [String]] = [:] - for opDescVar in vars { - if (canAdd(opDescVar.parameter)) { - map[opDescVar.parameter] = opDescVar.arguments - } - } - return map - } - - inputs = creator(protoOpDesc.inputs) { - opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false - } - - paraInputs = creator(protoOpDesc.inputs) { - !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false) - } - - outputs = creator(protoOpDesc.outputs) { - opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false - } - - unusedOutputs = creator(protoOpDesc.outputs) { - !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false) - } - - for attr in protoOpDesc.attrs { - if (attr.type != .block) { - attrs[attr.name] = attrWithProtoDesc(attrDesc: attr) - } +class OpDesc { + let inputs: [String : [String]] + var paraInputs: [String : [String]] + var outputs: [String : [String]] + let unusedOutputs: [String : [String]] + var attrs: [String : Attr] = [:] + var type: String + init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) { + type = protoOpDesc.type + let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in + var map: [String : [String]] = [:] + for opDescVar in vars { + if (canAdd(opDescVar.parameter)) { + map[opDescVar.parameter] = opDescVar.arguments } + } + return map } -} - -extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible { - var description: String { - var str = "" - str += "op type: \(type): \n" - str += " op inputs: \n" - str += " \(inputs) \n" - str += " op para inputs: \n" - str += " \(paraInputs) \n" - str += " op para outputs: \n" - str += " \(outputs) \n" - str += " op attrs: \n" - str += " \(attrs) \n" - - return str + + inputs = creator(protoOpDesc.inputs) { + opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false } - var debugDescription: String { - return description + paraInputs = creator(protoOpDesc.inputs) { + !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false) } + outputs = creator(protoOpDesc.outputs) { + opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false + } + + unusedOutputs = creator(protoOpDesc.outputs) { + !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false) + } + + for attr in protoOpDesc.attrs { + if (attr.type != .block) { + attrs[attr.name] = attrWithProtoDesc(attrDesc: attr) + } + } + } +} + +extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible { + var description: String { + var str = "" + str += "op type: \(type): \n" + str += " op inputs: \n" + str += " \(inputs) \n" + str += " op para inputs: \n" + str += " \(paraInputs) \n" + str += " op para outputs: \n" + str += " \(outputs) \n" + str += " op attrs: \n" + str += " \(attrs) \n" + return str + } + + var debugDescription: String { + return description + } + + } diff --git a/metal/paddle-mobile/paddle-mobile/Program/Program.swift b/metal/paddle-mobile/paddle-mobile/Program/Program.swift index 1481677b198f802cd5f29a967513b2df2107bc47..464705d6db2b87945029de1bfcebddb1bfb4d092 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/Program.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/Program.swift @@ -14,7 +14,7 @@ import Foundation -public struct Program { +public class Program { let paramPath: String let programDesc: ProgramDesc let scope: Scope @@ -23,4 +23,9 @@ public struct Program { paramPath = inParamPath scope = inScope } + init(inProgramDesc: ProgramDesc, inScope: Scope) { + programDesc = inProgramDesc + scope = inScope + paramPath = "" + } } diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift index ef094a8a20790b4e0cf47eaea04bb7d4f7a2d046..ad472e5a7d1fe9db248e47f4417d7c61fb01eaa9 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift @@ -14,7 +14,7 @@ import Foundation -public struct ProgramDesc { +public class ProgramDesc { var blocks: [BlockDesc] = [] init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) { for block in protoProgram.blocks { diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift index d819cdad533e444c327e95baff7bf87e902d6bff..87aced32c0c2cd576f023eeb5a3daad15daf1ce8 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift @@ -15,204 +15,285 @@ import Foundation precedencegroup ChainNode { - associativity: left - higherThan: MultiplicationPrecedence + associativity: left + higherThan: MultiplicationPrecedence } infix operator --> : ChainNode class Node { - var inputs: [Node] = [] - var outputs: [Node] = [] - var type: String - var opDesc: OpDesc? - init(inOpDesc: OpDesc) { - type = inOpDesc.type - opDesc = inOpDesc + var inputs: [Node] = [] + var outputs: [Node] = [] + var type: String + var opDesc: OpDesc? + init(inOpDesc: OpDesc) { + type = inOpDesc.type + opDesc = inOpDesc + } + + init(inType: String) { + type = inType + } + + subscript(index: Int) -> [Node] { + var nodes: [Node] = [] + getNodesWithLocation(index: index, nowIndex: 0, nodes: &nodes) + return nodes + } + + func getNodesWithLocation(index: Int, nowIndex: Int, nodes: inout [Node]) { + if index == nowIndex { + nodes.append(self) } - init(inType: String) { - type = inType + for output in outputs { + output.getNodesWithLocation(index: index, nowIndex: nowIndex + 1, nodes: &nodes) + } + } + + static func -->(lNode: Node, rNode: Node) -> Node { + lNode.outputs.append(rNode) + rNode.inputs.append(lNode) + return rNode + } + + func depth(begin: UInt = 1) -> UInt { + var beginMax: UInt = 1 + for output in outputs { + let subDepth = output.depth(begin: begin + 1) + beginMax = max(begin, subDepth) + } + beginMax = max(begin, beginMax) + return beginMax + } + + func to(depth: UInt) -> Node { + let beginNode = Node.init(inType: type) + beginNode.opDesc = opDesc + to(depth: depth - 1, withNode: beginNode) + return beginNode + } + + func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) { + let fusionNode = fusion.fusionNode() + let change = fusion.change() + let inOutputs = outputs + outputs.removeAll() + opDesc?.outputs.removeAll() + for i in 0..(lNode: Node, rNode: Node) -> Node { - lNode.outputs.append(rNode) - rNode.inputs.append(lNode) - return rNode + for attr in inOpdesc.attrs { + beginNode.opDesc?.attrs[attr.key] = attr.value + // print(beginNode.opDesc?.attrs) } - func depth(begin: UInt = 1) -> UInt { - var beginMax: UInt = 1 - for output in outputs { - let subDepth = output.depth(begin: begin + 1) - beginMax = max(begin, subDepth) + for paraInput in inOpdesc.paraInputs { + if let inChanges = change[type] { + for keyChange in inChanges { + if keyChange.from == paraInput.key { + beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value + } else { + beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value + } } - beginMax = max(begin, beginMax) - return beginMax + } else { + beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value + } } - func to(depth: UInt) -> Node { - let beginNode = Node.init(inType: type) - to(depth: depth - 1, withNode: beginNode) - return beginNode + if matchNode.outputs.count == 0 { + beginNode.outputs.append(contentsOf: outputs) + beginNode.opDesc?.outputs = inOpdesc.outputs + } + removedNodes.append(self) - func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) { - let fusionNode = fusion.fusionNode() - let change = fusion.change() - let inOutputs = outputs - outputs.removeAll() - opDesc?.outputs.removeAll() - for i in 0.. [String : Node]{ + var map: [String : Node] = [:] + relationship(map: &map) + return map + } + + private func relationship(map: inout [String : Node]) { + guard let inOpDesc = opDesc else { + return } + for output in inOpDesc.outputs { + for outputKey in output.value { + map[outputKey] = self + } + } + for output in outputs { + output.relationship(map: &map) + } + } + } extension Node: Equatable { - static func == (lhs: Node, rhs: Node) -> Bool { - if lhs.outputs.count != rhs.outputs.count { - return false - } - - if lhs.type != rhs.type { - return false - } - - for i in 0.. Bool { + if lhs.outputs.count != rhs.outputs.count { + return false } + if lhs.type != rhs.type { + return false + } + + for i in 0.. { - let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp

.self, ConvAddOp

.self] - func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { - - guard originProgramDesc.blocks.count == 1 else { - fatalError(" not support yet") + // register fusion + let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp

.self, +// ConvAddAddPreluOp

.self, + ConvAddPreluOp

.self, + ConvAddOp

.self, + ConvBNReluOp

.self, + DwConvBNReluOp

.self, + ElementwiseAddPreluOp

.self + ] + + func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { + + guard originProgramDesc.blocks.count == 1 else { + fatalError(" not support yet") + } + + var mapForNodeChain: [String : Node] = [:] + var nodes: [Node] = [] + var typeMapNodes: [String : [(node: Node, output: [String : Node])]] = [:] + let block = originProgramDesc.blocks[0] + for opDesc in block.ops { + guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else { + fatalError() + } + + let node = Node.init(inOpDesc: opDesc) + for inputKey in opInputKeys { + if let inputs = opDesc.inputs[inputKey] { + for input in inputs { + if let inputNode = mapForNodeChain[input] { + _ = inputNode --> node + } + } } - - var mapForNodeChain: [String : Node] = [:] - var nodes: [Node] = [] - var typeMapNodes: [String : [Node]] = [:] - let block = originProgramDesc.blocks[0] - for opDesc in block.ops { - guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else { - fatalError() - } - - let node = Node.init(inOpDesc: opDesc) - for inputKey in opInputKeys { - if let inputs = opDesc.inputs[inputKey] { - for input in inputs { - if let inputNode = mapForNodeChain[input] { - _ = inputNode --> node - } - } + } + + for outputKey in outputKeys { + if let outputs = opDesc.outputs[outputKey] { + for output in outputs { + mapForNodeChain[output] = node + } + } + } + + nodes.append(node) + + if var inNodes = typeMapNodes[opDesc.type] { + inNodes.append((node, mapForNodeChain)) + typeMapNodes[opDesc.type] = inNodes + } else { + typeMapNodes[opDesc.type] = [(node, mapForNodeChain)] + } + } + + for fusion in fusionOps { + let fusionNode = fusion.fusionNode() + let depth = fusionNode.depth() + if let toMatchNodes = typeMapNodes[fusionNode.type] { + for node in toMatchNodes { + + let toNode = node.node.to(depth: depth) + if toNode == fusionNode { // match + var canFolder = true + let relationshipMap = toNode.relationship() + + for toCheck in fusion.needCheck() { + // let nodes = toCheck + let checkNodes = toNode[toCheck.0] + + for checkNode in checkNodes { + let inputToChecks = checkNode.opDesc?.inputs[toCheck.1] ?? [] + for inputToCheck in inputToChecks { + if node.output[inputToCheck] == nil { + if relationshipMap[inputToCheck] == nil { + canFolder = false } + } } - for outputKey in outputKeys { - if let outputs = opDesc.outputs[outputKey] { - for output in outputs { - mapForNodeChain[output] = node - } + let paramInputToChecks = checkNode.opDesc?.paraInputs[toCheck.1] ?? [] + for paramInputToCheck in paramInputToChecks { + if node.output[paramInputToCheck] == nil { + if relationshipMap[paramInputToCheck] == nil { + canFolder = false } + } } - - nodes.append(node) - - if var inNodes = typeMapNodes[opDesc.type] { - inNodes.append(node) - typeMapNodes[opDesc.type] = inNodes - } else { - typeMapNodes[opDesc.type] = [node] - } + } } - for fusion in fusionOps { - let fusionNode = fusion.fusionNode() - let depth = fusionNode.depth() - if let toMatchNodes = typeMapNodes[fusionNode.type] { - for node in toMatchNodes { - let toNode = node.to(depth: depth) - if toNode == fusionNode { // match - var removeNodes: [Node] = [] - node.folderWith(fusion: fusion, removedNodes: &removeNodes) - for removeNode in removeNodes { - nodes.remove(element: removeNode) - } - } - } - } + if !canFolder { + continue } - - var ops: [OpDesc] = [] - for node in nodes { - ops.append(node.opDesc!) + + var removeNodes: [Node] = [] + node.node.folderWith(fusion: fusion, removedNodes: &removeNodes) + for removeNode in removeNodes { + nodes.remove(element: removeNode) + } + } } - - var newProgramDesc = ProgramDesc.init() - let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) - newProgramDesc.blocks.append(newBlock) - return newProgramDesc + } } + + var ops: [OpDesc] = [] + for node in nodes { + ops.append(node.opDesc!) + } + + var newProgramDesc = ProgramDesc.init() + let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) + newProgramDesc.blocks.append(newBlock) + return newProgramDesc + } } diff --git a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift index e564821ab6a68fc96f00aeb10f3b2fba26d9600e..1a72f5ef717063136c4708c881befd789a57219c 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift @@ -14,18 +14,18 @@ import Foundation -struct TensorDesc { +class TensorDesc { let dims: [Int] let dataType: VarTypeType - let dataLayout: DataLayout = .NCHW + let dataLayout: DataLayout = DataLayout.NCHW() var NCHWDim: [Int] { get { if dims.count != 4 { return dims } - if dataLayout == .NCHW { + if dataLayout == DataLayout.NCHW() { return dims - } else if dataLayout == .NHWC{ + } else if dataLayout == DataLayout.NHWC() { var resultDims = dims resultDims.swapAt(1, 3) return resultDims @@ -40,9 +40,9 @@ struct TensorDesc { if dims.count != 4 { return dims } - if dataLayout == .NHWC { + if dataLayout == DataLayout.NHWC() { return dims - } else if dataLayout == .NCHW{ + } else if dataLayout == DataLayout.NCHW() { var resultDims = dims resultDims.swapAt(1, 3) return resultDims @@ -53,7 +53,7 @@ struct TensorDesc { } init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) { - dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 } + dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) } dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType } diff --git a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift index 58411828c0c94316da089fc1e2442c87bd154594..f29169598f69ec568bd9d08af8fa4738fe8f5eea 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift @@ -56,7 +56,7 @@ enum VarTypeType: Int { } } -struct VarDesc { +class VarDesc { let name: String let persistable: Bool let type: VarTypeType diff --git a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift index 672484cd9d055bbe65a61d41017199dd79d6cdb2..7e4a05a8dcfc17be10f183de36575342383bb560 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift @@ -31,15 +31,14 @@ public struct Dim { return dims.reduce(1) { $0 * $1 } } - static func ==(left: Dim, right: Dim) -> Bool { + public static func ==(left: Dim, right: Dim) -> Bool { return left.dims == right.dims; } - subscript(index: Int) -> Int { + public subscript(index: Int) -> Int { return dims[index]; } - private(set) var dims: [Int] private init(){ fatalError() diff --git a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift new file mode 100644 index 0000000000000000000000000000000000000000..bdaf8d0973ad3fa6c70e04ad84fd1b14bcb8b39a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift @@ -0,0 +1,201 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + + +let testTo = 81 + +var isTest = false + +let computePrecision: ComputePrecision = .Float16 + +public class GPUResultHolder { + public let dim: [Int] + public let capacity: Int + public var resultPointer: UnsafeMutablePointer? + public var intermediateResults: [String : [Variant]]? + public let elapsedTime: Double + public init(inDim: [Int], inPointer: UnsafeMutablePointer?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) { + dim = inDim + capacity = inCapacity + + if let inInPointer = inPointer { + resultPointer = UnsafeMutablePointer.allocate(capacity: inCapacity) + resultPointer?.initialize(from: inInPointer, count: inCapacity) + } + + elapsedTime = inElapsedTime + intermediateResults = inIntermediateResults + } + +} + +extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible { + public var debugDescription: String { +// var str = "" +// str += "Dim: \(dim) \n value:[ " +// if resultArr.count < 20 { +// for d in resultArr { +// str += " \(d) " +// } +// } else { +// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) { +// str += " \(resultArr[d]) " +// } +// } +// str += " ]" +// return str + fatalError() + } + + public var description: String { + return debugDescription + } +} + +public class Executor { + var ops: [Runable & InferShaperable] = [] + let program: Program + let device: MTLDevice + let inflightSemaphore: DispatchSemaphore + let queue: MTLCommandQueue + public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws { + self.inflightSemaphore = DispatchSemaphore(value: 3) + program = inProgram + device = inDevice + queue = inQueue +// print("before for ") +//print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"]) + + + for block in inProgram.programDesc.blocks { + //block.ops.count + for i in 0...shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope) + ops.append(op) + } catch let error { + throw error + } + } + } + } + + public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws { + guard let buffer = queue.makeCommandBuffer() else { + throw PaddleMobileError.predictError(message: "CommandBuffer is nil") + } + inflightSemaphore.wait() + + let resInput: MTLTexture + if let inPre = preProcessKernle { + do { + try inPre.compute(inputTexuture: input, commandBuffer: buffer) + resInput = inPre.outputTexture + } catch let error { + throw error + } + } else { + resInput = input + } + + let beforeDate = Date.init() + let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim)) + program.scope.setInput(input: inputTexture) + //(ops.count - except) + for i in 0..<(ops.count - except) { + let op = ops[i] + do { + try op.run(device: device, buffer: buffer) + } catch let error { + throw error + } + } + + var outputTextures: [String : [Variant]]? + if except > 0 { + ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer) + outputTextures = ops[ops.count - except].inputVariant() + } + + buffer.addCompletedHandler { [weak self] (commandbuffer) in +// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) +// print(inputArr.strideArray()) +// +//// print(dim) +// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr) +// print(" write done ") + +// print("write to library done") +// return +// print(inputArr) +// +// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray() +// print(stridableInput) +// +// let _: Flo? = input.logDesc(header: "input: ", stridable: true) +// for i in 0.. 0 { + resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures) + } else { + let outputVar: Variant = SSelf.program.scope.output()! + let output: FetchHolder = outputVar as! FetchHolder +// let beforeToTensorDate = Date.init() + + resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate)) + +// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate) +// print(timeToTensor) + } + + completionHandle(resultHolder) + SSelf.inflightSemaphore.signal() + } + buffer.commit() + } + + public func clear() { + program.scope.clear() + } + +} diff --git a/metal/paddle-mobile/paddle-mobile/framework/Loader.swift b/metal/paddle-mobile/paddle-mobile/framework/Loader.swift new file mode 100644 index 0000000000000000000000000000000000000000..ee640ddf1163bb1f41da49fe9089964321792d9f --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/framework/Loader.swift @@ -0,0 +1,259 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation +import SwiftProtobuf + +public class Loader { + class ParaLoader { + let file: UnsafeMutablePointer + let fileSize: Int + var nowIndex: Int + init(paramPath: String) throws { + guard let tmpFile = fopen(paramPath, "rb") else { + throw PaddleMobileError.loaderError(message: "open param file error" + paramPath) + } + file = tmpFile + fseek(file, 0, SEEK_END) + fileSize = ftell(file) + guard fileSize > 0 else { + throw PaddleMobileError.loaderError(message: "param file size is too small") + } + rewind(file) + nowIndex = 0 + } + + func read(tensor: Tensor

) throws { + guard nowIndex <= fileSize else { + throw PaddleMobileError.loaderError(message: "out of the file range") + } + + func pointerReader(type: T.Type) -> T { + let ptr = UnsafeMutablePointer.allocate(capacity: MemoryLayout.size) + fread(ptr, 1, MemoryLayout.size, file) + nowIndex += MemoryLayout.size + let pointee = ptr.pointee + ptr.deinitialize(count: MemoryLayout.size) + ptr.deallocate() + return pointee + } + + let _ = pointerReader(type: UInt32.self) + let lodLevel = pointerReader(type: UInt64.self) + for _ in 0...size)){ + _ = pointerReader(type: size_t.self) + } + } + + let _ = pointerReader(type: UInt32.self) + + let tensorDescSize = pointerReader(type: Int32.self) + + fseek(file, Int(tensorDescSize), SEEK_CUR) + nowIndex += Int(tensorDescSize) + + /* + 这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度 + */ + + //现在模型传入模型为 Float 类型, 这块应该根据模型来 + // let tmpCapacity = MemoryLayout.size * tensor.numel() + // let tmpPointer = UnsafeMutablePointer.allocate(capacity: tmpCapacity); + let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) + + guard bytesRead == tensor.data.size else { + throw PaddleMobileError.loaderError(message: "param read size error") + } + + // TODO: use script to convert + // let bytesRead = fread(tmpPointer, 1, tmpCapacity, file) + // for i in 0..) throws { + guard nowIndex <= paramSize else { + throw PaddleMobileError.loaderError(message: "out of the file range") + } + var readerIndex: Int = 0 + func pointerReader(type: T.Type) -> T { + let ptr = UnsafeMutablePointer.allocate(capacity: MemoryLayout.size) + memcpy(ptr, paramPointer.advanced(by: Int(readerIndex)), MemoryLayout.size) + nowIndex += MemoryLayout.size + readerIndex += MemoryLayout.size + let pointee = ptr.pointee + ptr.deinitialize(count: MemoryLayout.size) + ptr.deallocate() + + return pointee + } + let _ = pointerReader(type: UInt32.self) + let lodLevel = pointerReader(type: UInt64.self) + for _ in 0...size)){ + _ = pointerReader(type: size_t.self) + } + } + + let _ = pointerReader(type: UInt32.self) + let tensorDescSize = pointerReader(type: Int32.self) + + paramPointer = paramPointer.advanced(by: Int(readerIndex)) + paramPointer = paramPointer.advanced(by: Int(tensorDescSize)) + nowIndex += Int(tensorDescSize) + + let _ = memcpy(tensor.data.pointer, paramPointer, tensor.data.size) + paramPointer = paramPointer.advanced(by: Int(tensor.data.size)) + nowIndex += tensor.data.size + } + deinit { + } + } + public init(){} + func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program { + do { + let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init( + serializedData: modelData) + + let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram) + let programDesc = ProgramOptimize

.init().optimize(originProgramDesc: originProgramDesc) + print(programDesc) + + guard programDesc.blocks.count > 0 else { + throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0") + } + + // to get feed key and fetch key + let block = programDesc.blocks[0] + guard let firstOp = block.ops.first, let lastOp = block.ops.last else { + throw PaddleMobileError.loaderError(message: "at least two operator") + } + + guard firstOp.type == gFeedType, lastOp.type == gFetchType else { + throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch") + } + + guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else { + throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found") + } + guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else { + throw PaddleMobileError.loaderError(message: "feed key or fetch key not found") + } + + let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey) + + // to load memory + for block in programDesc.blocks { + for varDesc in block.vars { + if (varDesc.type == .LodTensor) { + guard let tensorDesc = varDesc.tensorDesc else { + throw PaddleMobileError.loaderError(message: "get tensor desc failed") + } + + if (varDesc.persistable + && varDesc.type != .FeedMiniBatch + && varDesc.type != .FetchList) { + let dimArr = tensorDesc.dims + + guard dimArr.count > 0 else { + throw PaddleMobileError.loaderError(message: "tensor desc dim size error") + } + + let dim = Dim.init(inDim: dimArr) + let tensor = Tensor

.init(inDim: dim, inLayout: tensorDesc.dataLayout) + do { + if paraLoaderPointer != nil { + try paraLoaderPointer!.read(tensor: tensor) + } + + if paraLoader != nil { + try paraLoader!.read(tensor: tensor) + } + } catch let error { + throw error + } + // tensor.convert(to: DataLayout.NHWC()) + // tensor.initBuffer(device: device) + scope[varDesc.name] = tensor + } else { + let dim = Dim.init(inDim: tensorDesc.dims) + scope[varDesc.name] = Texture

.init(device: device, inDim: dim) + } + } else { + if varDesc.name == fetchKey { +// scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0) + } else if varDesc.name == feedKey { + } + } + } + } + + let program = Program.init(inProgramDesc: programDesc, inScope: scope) + + return program + } catch _ { + throw PaddleMobileError.loaderError(message: "protobuf decoder error") + } + } + public func load(device:MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) throws -> Program { + let modelData = Data.init(bytes:modePointer, count:modelSize) + guard let paraLoader = try? ParaLoaderWithPointer.init(pPointer: paramPointer,pSize: paramSize) else { + throw PaddleMobileError.loaderError(message: "load para error") + } + do { + let program = try loadModelandParam(device,modelData,paraLoader,nil) + return program + } catch let error { + throw error + } + } + + public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{ + guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else { + throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !") + } + guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else { + throw PaddleMobileError.loaderError(message: "load para error") + } + + do { + let program = try loadModelandParam(device,modelData,nil,paraLoader) + return program + } catch let error { + throw error + } + } +} diff --git a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift index 7ffcd97f4418f17cd7085c5d03e8b58b45c623fd..c5ee1414521e7eb92011d4f4b608ad326b005531 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift @@ -12,251 +12,308 @@ See the License for the specific language governing permissions and limitations under the License. */ -import Accelerate import Foundation protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{ - var dim: Dim { get set } - func numel() -> Int - var layout: DataLayout { get } + var dim: Dim { get set } + func numel() -> Int + var layout: DataLayout { get } } extension Tensorial { - func numel() -> Int { - return dim.numel() - } + func numel() -> Int { + return dim.numel() + } +} + +public enum ComputePrecision { + case Float32, Float16 } class Tensor: Tensorial { - enum BufferPrecision { - case Float32, Float16 + + var data: Data + var dim: Dim + var buffer: MTLBuffer! + private(set) var layout: DataLayout + + class Data { + init(inSize: Int, inPointer: UnsafeMutablePointer

) { + size = inSize + pointer = inPointer + } + let size: Int + var pointer: UnsafeMutablePointer

+ subscript(index: Int) -> P{ + get { + return pointer[index] + } + set { + pointer[index] = newValue + } + } + func release() { + pointer.deinitialize(count: size) + pointer.deallocate() + } + deinit { + // release() + } + } + + required init(inDim: Dim, inLayout: DataLayout = DataLayout.NCHW()) { + dim = inDim + let size = inDim.numel() * MemoryLayout

.size + let pointer = UnsafeMutablePointer

.allocate(capacity: size) + data = Data.init(inSize: size, inPointer: pointer) + layout = inLayout + } + + func convert(to: DataLayout) { + guard to != layout else { + return } - var data: Data - var dim: Dim - var buffer: MTLBuffer! - private(set) var layout: DataLayout + guard dim.cout() == 4 else { + return + } - class Data { - init(inSize: Int, inPointer: UnsafeMutablePointer

) { - size = inSize - pointer = inPointer - } - let size: Int - var pointer: UnsafeMutablePointer

- subscript(index: Int) -> P{ - get { - return pointer[index] - } - set { - pointer[index] = newValue - } - } - func release() { - pointer.deinitialize(count: size) - pointer.deallocate() - } - deinit { -// release() - } + guard layout == DataLayout.NCHW() && to == DataLayout.NHWC() else { + // other not support + return } - - required init(inDim: Dim, inLayout: DataLayout = .NCHW) { - dim = inDim - let size = inDim.numel() * MemoryLayout

.size - let pointer = UnsafeMutablePointer

.allocate(capacity: size) - data = Data.init(inSize: size, inPointer: pointer) - layout = inLayout + let newPointer = UnsafeMutablePointer

.allocate(capacity: data.size) + + if layout == DataLayout.NCHW() { + NCHW2NHWC(newPtr: newPointer) } - func convert(to: DataLayout) { - guard to != layout else { - return - } - - guard dim.cout() == 4 else { - return - } - - guard layout == .NCHW && to == .NHWC else { - // other not support - return - } - let newPointer = UnsafeMutablePointer

.allocate(capacity: data.size) - - if layout == .NCHW { - NCHW2NHWC(newPtr: newPointer) - } - - data.release() - data.pointer = newPointer - layout = to + data.release() + data.pointer = newPointer + layout = to + } + + + + func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, convertToNHWC: Bool = true, withTranspose: Bool = false) { + if convertToNHWC { +// print(layout) + convert(to: DataLayout.NHWC()) } - func float32ToFloat16(input: UnsafeMutablePointer, output: UnsafeMutableRawPointer, count: Int) { - var float32Buffer = vImage_Buffer(data: input, height: 1, width: UInt(count), rowBytes: count * 4) - var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2) - guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else { - fatalError(" float 32 to float 16 error ! ") + if withTranspose { + let transposePointer = UnsafeMutablePointer

.allocate(capacity: numel()) + let n = dim[0] + let hwc = numel()/n + for j in 0.. else { - fatalError(" not support yet ") - } - - - let precisionSize: Int - switch precision { - case .Float32: - precisionSize = 4 - case .Float16: - precisionSize = 2 - } - - if dim.cout() == 4 { - if layout == .NHWC { - let C = dim[3] - let cSlices = (C + 3) / 4 - let paddedC = cSlices * 4 - let count = paddedC * dim[0] * dim[1] * dim[2] - if C == paddedC { - buffer = device.makeBuffer(length: count * precisionSize) - switch precision { - case .Float32: - buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout

.stride) - case .Float16: - float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count) - } - } else if C == 1 { - buffer = device.makeBuffer(length: numel() * precisionSize) - switch precision { - case .Float32: - buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout

.stride) - case .Float16: - float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel()) - } - } else { - buffer = device.makeBuffer(length: count * precisionSize) - let convertedPointer = UnsafeMutablePointer.allocate(capacity: count) - var tmpPointer = floatPointer - var dstPtr = convertedPointer - for _ in 0...stride) - case .Float16: - float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count) - } - - convertedPointer.deinitialize(count: count) - convertedPointer.deallocate() - } - } - } else if dim.cout() == 1 { - buffer = device.makeBuffer(length: numel() * precisionSize) - switch precision { - case .Float32: - buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout

.stride) - case .Float16: - float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel()) - } - } else { - fatalError(" not support !") - } - //TODO: release - data.release() + guard let floatPointer = data.pointer as? UnsafeMutablePointer else { + fatalError(" not support yet ") } - var width: Int { - get { - if dim.cout() == 4 { - return dim[1] - } else { - fatalError() - } - } + let precisionSize: Int + switch precision { + case .Float32: + precisionSize = 4 + case .Float16: + precisionSize = 2 } - var height: Int { - get { - if dim.cout() == 4 { - return dim[2] - } else { - fatalError() + if dim.cout() == 4 { + if layout == DataLayout.NHWC() { + let C = dim[3] + let cSlices = (C + 3) / 4 + let paddedC = cSlices * 4 + let count = paddedC * dim[0] * dim[1] * dim[2] + if C == paddedC { + buffer = device.makeBuffer(length: count * precisionSize) + switch precision { + case .Float32: + buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout

.stride) + case .Float16: + float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count) + } + } else if C == 1 { + buffer = device.makeBuffer(length: numel() * precisionSize) + switch precision { + case .Float32: + buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout

.stride) + case .Float16: + float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel()) + } + } else { + buffer = device.makeBuffer(length: count * precisionSize) + let convertedPointer = UnsafeMutablePointer.allocate(capacity: count) + var tmpPointer = floatPointer + var dstPtr = convertedPointer + for _ in 0...stride) + case .Float16: + float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count) + } + + convertedPointer.deinitialize(count: count) + convertedPointer.deallocate() } - } - - var channel: Int { - get { - if dim.cout() == 4 { - return dim[3] - } else { - fatalError() + } else { + let C = dim[3] + let cSlices = (C + 3) / 4 + let paddedC = cSlices * 4 + let count = paddedC * dim[0] * dim[1] * dim[2] + if C == paddedC { + buffer = device.makeBuffer(length: count * precisionSize) + switch precision { + case .Float32: + buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout

.stride) + case .Float16: + float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count) + } + } else if C == 1 { + fatalError(" not support ") + } else { + buffer = device.makeBuffer(length: count * precisionSize) + let convertedPointer = UnsafeMutablePointer.allocate(capacity: count) + var tmpPointer = floatPointer + var dstPtr = convertedPointer + for _ in 0...stride) + case .Float16: + float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count) + } + convertedPointer.deinitialize(count: count) + convertedPointer.deallocate() } + } + } else if dim.cout() == 1 { + let num = ((numel() + 3) / 4) * 4 + buffer = device.makeBuffer(length: num * precisionSize) + switch precision { + case .Float32: + buffer?.contents().copyMemory(from: data.pointer, byteCount: num * MemoryLayout

.stride) + case .Float16: + float32ToFloat16(input: floatPointer, output: buffer.contents(), count: num) + } + } else { + fatalError(" not support !") } - + //TODO: release + data.release() + } + + var width: Int { + get { + if dim.cout() == 4 { + return dim[1] + } else { + fatalError() + } + } + } + + var height: Int { + get { + if dim.cout() == 4 { + return dim[2] + } else { + fatalError() + } + } + } + + var channel: Int { + get { + if dim.cout() == 4 { + return dim[3] + } else { + fatalError() + } + } + } + + + func NCHW2NHWC(newPtr: UnsafeMutablePointer

) { + let N = dim[0] + let C = dim[1] + let H = dim[2] + let W = dim[3] + let HXW = H * W + let CXHXW = C * H * W - func NCHW2NHWC(newPtr: UnsafeMutablePointer

) { - let N = dim[0] - let C = dim[1] - let H = dim[2] - let W = dim[3] - let HXW = H * W - let CXHXW = C * H * W - - var index: Int = 0 - for n in 0...size { - str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" - } - return str + + var debugDescription: String { + var str = "dim: \(dim) \n" + str += "MTLBuffer: \(self.buffer) \n" + for i in 0...size { + str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" } - - func logDataPointer(header: String = "") { - print(header) - var str = "" - str += "data size: \(data.size) \n" - str += "dim: \(dim) \n" - for i in 0..: Tensorial { - var dim: Dim - let textureDesc: MTLTextureDescriptor - var metalTexture: MTLTexture - - init(device: MTLDevice, inDim: Dim, inLayout: DataLayout = .NHWC) { - dim = inDim - layout = inLayout - let tmpTextureDes = MTLTextureDescriptor.init() - if inDim.cout() == 1 { - tmpTextureDes.width = inDim[0] - tmpTextureDes.textureType = .type1D - } else if inDim.cout() == 4 { - tmpTextureDes.height = inDim[1] - tmpTextureDes.width = inDim[2] -// print("n : \(inDim[0])") -// print(inDim[3] * inDim[0]) - tmpTextureDes.depth = 1 - tmpTextureDes.arrayLength = (inDim[3] * inDim[0] + 3)/4 - tmpTextureDes.textureType = .type2DArray - } else if inDim.cout() == 2 { - tmpTextureDes.height = 1 - tmpTextureDes.width = 1 - tmpTextureDes.depth = 1 - tmpTextureDes.arrayLength = (inDim[0] * inDim[1] + 3)/4 - tmpTextureDes.textureType = .type2DArray - } else { - fatalError(" not suuprt ") - } - - if MemoryLayout

.size == 1 { - tmpTextureDes.pixelFormat = .rgba8Unorm - } else if MemoryLayout

.size == 2 { - tmpTextureDes.pixelFormat = .rgba16Float - } else if MemoryLayout

.size == 4 { -// tmpTextureDes.pixelFormat = .r32Float - tmpTextureDes.pixelFormat = .rgba32Float - } -// tmpTextureDes.pixelFormat = .rgba16Float +/* + 4 维 tensor 存储 texture,要考虑 transpose + transpose 之后的维度是 [a, b, c, d],对应的texture_2darray + .width = c + .height = b + .len = a * d + 3 / 4 + +低于 4 维的 tensor,transpose 必须为 [0, 1, 2, 3] 既不考虑 transpose + +// TODO transpose 对于低维 tensor 的扩展原则。。。 +// [a, b] -> [1, 1, a, b] transpose 必须为 [0, 1, x, x] +// [a] -> [1, 1, 1, a] transpose 必须为 [0, 1, 2, 3] +// [a, b, c] -> [1, a, b, c] tranpose 必须为 [0, x, x, x] + +3 维 tensor [a, b, c] 对应的 texture_2darray, +.width = c +.height = b +.len = a + 3 / 4 + + 2 维 tensor [a, b] 对应的 texture_2darray + .width = b + 3 / 4 + .height = a + .len = 1 + + 1 维 tensor [a] 对应的 texture_2darray + .width = a + 3 / 4 + .height = 1 + .len = 1 + */ + - tmpTextureDes.usage = [.shaderRead, .shaderWrite] - tmpTextureDes.storageMode = .shared - textureDesc = tmpTextureDes - metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil " +public class Texture: Tensorial { + var dim: Dim + public var tensorDim: Dim + public var padToFourDim: Dim + private var textureDesc: MTLTextureDescriptor! + public var metalTexture: MTLTexture! + var transpose: [Int] = [0, 1, 2, 3] + + func toTensor() -> [Float32] { + guard padToFourDim.cout() == 4 else { + fatalError("- not support -") } + return metalTexture.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) + } + + func realNHWC() -> [Float32] { + guard padToFourDim.cout() == 4 else { + fatalError(" - not support - ") + } + return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) + } + + func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) { + transpose = inTranspose + for i in 0..<(4 - tensorDim.cout()) { + if i != inTranspose[i] { + fatalError() + } + } + let newDim = transpose.map { padToFourDim[$0] } -// required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) { -// dim = inDim -// layout = inLayout -// metalTexture = inTexture -// let tmpTextureDes = MTLTextureDescriptor.init() -// -// if inDim.cout() == 1 { -// tmpTextureDes.width = inDim[0] -// tmpTextureDes.textureType = .type1D -// } else if inDim.cout() == 2 { -// tmpTextureDes.height = inDim[0] -// tmpTextureDes.width = inDim[1] -// tmpTextureDes.textureType = .type2D -// } else if inDim.cout() == 3 { -// fatalError(" not support texture dim 3") -// } else if inDim.cout() == 4 { -// tmpTextureDes.height = inDim[1] -// tmpTextureDes.width = inDim[2] -// tmpTextureDes.depth = inDim[3] * inDim[1] -// tmpTextureDes.textureType = .type2DArray -// } -// -// tmpTextureDes.pixelFormat = .r32Float -// tmpTextureDes.storageMode = .shared -// textureDesc = tmpTextureDes -// let device = MTLCreateSystemDefaultDevice() -// metalTexture = device!.makeTexture(descriptor: tmpTextureDes)! -// } + let newLayout = transpose.map { layout.layoutWithDim[$0] } -// init() { -// dim = Dim.init(inDim: []) -// layout = .NCHW -// let device = MTLCreateSystemDefaultDevice() -// textureDesc = MTLTextureDescriptor.init() -// metalTexture = device!.makeTexture(descriptor: textureDesc)! -// } + layout = DataLayout.init(newLayout) + dim = Dim.init(inDim: newDim) - private(set) var layout: DataLayout -} - -extension Texture { - public var description: String { - return debugDescription - } + let tmpTextureDes = MTLTextureDescriptor.init() + tmpTextureDes.textureType = .type2DArray + tmpTextureDes.depth = 1 - public var debugDescription: String{ - var str = "" - str += "Dim: \(dim) \n value:[ " - str += "\(metalTexture)" - str += " ]" - return str + switch tensorDim.cout() { + case 4: + tmpTextureDes.width = newDim[2] + tmpTextureDes.height = newDim[1] + tmpTextureDes.arrayLength = ((newDim[0]) * (newDim[3]) + 3) / 4 + case 3: + tmpTextureDes.width = newDim[3] + tmpTextureDes.height = newDim[2] + tmpTextureDes.arrayLength = (newDim[1] + 3) / 4 + case 2, 1: + tmpTextureDes.width = (newDim[3] + 3) / 4 + tmpTextureDes.height = newDim[2] + tmpTextureDes.arrayLength = 1 + default: + fatalError("unreachable") + } + + if computePrecision == .Float16 { + tmpTextureDes.pixelFormat = .rgba16Float + } else if computePrecision == .Float32 { + tmpTextureDes.pixelFormat = .rgba32Float } + tmpTextureDes.usage = [.shaderRead, .shaderWrite] + tmpTextureDes.storageMode = .shared + textureDesc = tmpTextureDes + metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil " + } + + init(device: MTLDevice, inDim: Dim) { + var fourDim: Dim + if inDim.cout() == 4 { + fourDim = inDim + } else if inDim.cout() < 4 { + var fourDimNum: [Int] = [] + for _ in 0..<(4 - inDim.cout()) { + fourDimNum.append(1) + } + fourDimNum.append(contentsOf: inDim.dims) + fourDim = Dim.init(inDim: fourDimNum) + } else { + fatalError(" not support ") + } + tensorDim = inDim + dim = fourDim + padToFourDim = fourDim + layout = DataLayout.init([(.N, fourDim[0]), (.C, fourDim[1]), (.H, fourDim[2]), (.W, fourDim[3])]) + } + + private(set) var layout: DataLayout +} + +extension Texture { + public var description: String { + return debugDescription + } + + public var debugDescription: String{ + var str = "" + str += "Dim: \(dim) \n value:[ " + str += "\(metalTexture)" + str += " ]" + return str + } + } diff --git a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h index ffa44be38a4c3a1f3109c51b3d15506591f2de2e..50b60e9fe6c973b675a97e16c3c15af2b72e3fc4 100644 --- a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h +++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h @@ -14,12 +14,15 @@ #pragma once +#import "PaddleMobileCPU.h" +#import "CPUCompute.h" +#import "PaddleMobileGPU.h" #import //! Project version number for paddle_mobile. -FOUNDATION_EXPORT double paddle_mobileVersionNumber; +//FOUNDATION_EXPORT double paddle_mobileVersionNumber; //! Project version string for paddle_mobile. -FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[]; +//FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[]; diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 10787b915594a12a826a087e5453b2c2e8c03f9a..f10aee5014d8f377ecc8e1735276aebf6418436f 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -311,6 +311,8 @@ int get_aligned_filter_num(int num) { void format_filter(framework::Tensor *filter_tensor, float max_value, int group_num) { + filter_tensor->scale[0] = float(max_value / 127.0); + filter_tensor->scale[1] = float(127.0 / max_value); auto dims = filter_tensor->dims(); auto num = dims[0], channel = dims[1], height = dims[2], width = dims[3]; auto data_ptr = filter_tensor->data(); diff --git a/src/io/executor.cpp b/src/io/executor.cpp index c8d8f52a427bb1ee2b9fa04c9ef09f8e626f11b0..d733231ef03f74eba2f1f2e989a0bad1cf43f161 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -676,11 +676,11 @@ std::shared_ptr Executor::FetchResult(int id) { to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; - PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range"); - auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id]; - auto output_map = last_op->Outputs(); - std::vector out_keys = last_op->GetOutKeys(); - PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output"); + PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range"); + auto op = id < 0 ? ops[ops.size() - 1] : ops[id]; + auto output_map = op->Outputs(); + std::vector out_keys = op->GetOutKeys(); + PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "this op contains no output"); auto *output_tensor = framework::GetVarValue( out_keys[0], output_map, *(program_.scope)); return std::make_shared(framework::Tensor(*output_tensor)); diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index cccd4f52ebdc368e4f68eaf9dc3f25ee3693fdd2..1b36461932798153af60d936dbac91817a4100df 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -50,8 +50,8 @@ class FeedOp : public framework::OperatorBase { void RunImpl() const { auto input = (Tensor *)const_cast(param_.InputX()); - auto input_ptr = input->data(); fpga::format_image(input); + auto input_ptr = input->data(); Tensor *output = param_.Out(); auto output_ptr = output->data(); diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp index 86da2833ed6e1443707054896127e87c0ca297b9..1c48ef021945e6a7b8b53ee946a33b862766deeb 100644 --- a/src/operators/kernel/fpga/concat_kernel.cpp +++ b/src/operators/kernel/fpga/concat_kernel.cpp @@ -47,7 +47,7 @@ bool ConcatKernel::Init(ConcatParam *param) { concatArgs.image_num = (uint32_t)image_num; concatArgs.images_in = images_in; concatArgs.scales_in = scales_in; - concatArgs.image_out = (half *)out->mutable_data(); + concatArgs.image_out = (half *)out->data(); concatArgs.scale_out = out->scale; concatArgs.channel_num = channel_num; concatArgs.height = (uint32_t)height; diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index 7cfd0c7d76c1a8e73955dbec1971d86ceebde259..ef68cc3c33fdc4c0a8537cbb1dd3a49583c6c8b1 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -39,8 +39,8 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.image.height = 1; args.image.width = 1; args.image.channels = (uint32_t)input->dims()[1]; - args.output.address = float_input->mutable_data(); - + args.output.address = float_input->data(); + args.output.scale_address = float_input->scale; param->SetFloatInput(float_input); param->SetFpgaArgs(args); return true; diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index 25cad4feaa706899122902dee2a8f0c915e78975..d3d5705e63e3dffad0d4bad5422c27d57dddc350 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -21,8 +21,15 @@ int main() { paddle_mobile::Loader loader; // ../../../test/models/googlenet // ../../../test/models/mobilenet +<<<<<<< HEAD + auto program = loader.Load(g_mobilenet_ssd, false, false); + // auto program = loader.Load(g_googlenet_combine + "/model", + // g_googlenet_combine + + // "/params", true); +======= // auto program = loader.Load(g_googlenet, true); // auto program = loader.Load(g_mobilenet_ssd, true); +>>>>>>> e60ab7ae5a43b9cc788813877fbfffc67c87b5f3 auto program = loader.Load(std::string(g_ocr) + "/model", std::string(g_ocr) + "/params", false);