提交 5ccf5092 编写于 作者: H hjchen2

Merge conflicts

...@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.0.0) ...@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.0.0)
option(USE_OPENMP "build with openmp support" ON) option(USE_OPENMP "build with openmp support" ON)
option(USE_EXCEPTION "build with exception" ON) option(USE_EXCEPTION "build with exception" ON)
option(WITH_LOGGING "print logging for debug" ON) option(WITH_LOGGING "print logging for debug" OFF)
option(WITH_SYMBOL "build with all symbols" ON) # turn off if use jni or ios io option(WITH_SYMBOL "build with all symbols" ON) # turn off if use jni or ios io
option(WITH_PROFILE "print op profile for debug" OFF) option(WITH_PROFILE "print op profile for debug" OFF)
option(WITH_TEST "build with unit tests" ON) option(WITH_TEST "build with unit tests" ON)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */; }; FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */; };
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5021E0CEBB0075EC91 /* mobilenet_params */; }; FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5021E0CEBB0075EC91 /* mobilenet_params */; };
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5321E0CEF80075EC91 /* synset.txt */; }; FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5321E0CEF80075EC91 /* synset.txt */; };
FCC15E13221E715400DC3CB2 /* paddle-mobile-metallib.metallib in Resources */ = {isa = PBXBuildFile; fileRef = FCC15E12221E715400DC3CB2 /* paddle-mobile-metallib.metallib */; };
FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; }; FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; };
FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870821DFAEC7009A87DA /* AppDelegate.swift */; }; FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870821DFAEC7009A87DA /* AppDelegate.swift */; };
...@@ -49,6 +50,7 @@ ...@@ -49,6 +50,7 @@
FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; }; FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FCB40E5021E0CEBB0075EC91 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; }; FCB40E5021E0CEBB0075EC91 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FCB40E5321E0CEF80075EC91 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; }; FCB40E5321E0CEF80075EC91 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCC15E12221E715400DC3CB2 /* paddle-mobile-metallib.metallib */ = {isa = PBXFileReference; lastKnownFileType = "archive.metal-library"; name = "paddle-mobile-metallib.metallib"; path = "../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"; sourceTree = "<group>"; };
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MobileNetDemo.app; sourceTree = BUILT_PRODUCTS_DIR; }; FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MobileNetDemo.app; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870821DFAEC7009A87DA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; }; FCF2870821DFAEC7009A87DA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
...@@ -127,6 +129,7 @@ ...@@ -127,6 +129,7 @@
FCF286FC21DFAEC7009A87DA = { FCF286FC21DFAEC7009A87DA = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCC15E12221E715400DC3CB2 /* paddle-mobile-metallib.metallib */,
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */, FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */,
FCF2870721DFAEC7009A87DA /* MobileNetDemo */, FCF2870721DFAEC7009A87DA /* MobileNetDemo */,
FCF2870621DFAEC7009A87DA /* Products */, FCF2870621DFAEC7009A87DA /* Products */,
...@@ -225,6 +228,7 @@ ...@@ -225,6 +228,7 @@
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */, FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */,
FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */, FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */,
FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */, FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */,
FCC15E13221E715400DC3CB2 /* paddle-mobile-metallib.metallib in Resources */,
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */, FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */,
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */, FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */,
FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */, FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */,
......
...@@ -42,10 +42,11 @@ public class MobileNet: Net{ ...@@ -42,10 +42,11 @@ public class MobileNet: Net{
let labels = PreWords.init(fileName: "synset") let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
let resPointer = res.result let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = [] var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{ (0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
} }
return s.joined(separator: "\n") return s.joined(separator: "\n")
...@@ -58,6 +59,9 @@ public class MobileNet: Net{ ...@@ -58,6 +59,9 @@ public class MobileNet: Net{
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetPreProccess.init(device: device) preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = true
} }
} }
...@@ -27,3 +27,8 @@ target 'MobileNetDemo' do ...@@ -27,3 +27,8 @@ target 'MobileNetDemo' do
pod 'Protobuf', '~> 3.0.0' pod 'Protobuf', '~> 3.0.0'
end end
target 'paddle-mobile-metallib' do
project 'paddle-mobile-metallib/paddle-mobile-metallib.xcodeproj'
end
...@@ -33,8 +33,6 @@ ...@@ -33,8 +33,6 @@
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; }; FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; };
FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; }; FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; };
FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; }; FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; };
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */; };
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */; };
FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; }; FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; };
FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; }; FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; }; FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
...@@ -44,11 +42,18 @@ ...@@ -44,11 +42,18 @@
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; }; FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; };
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; }; FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; };
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797CE21D6506F00F2FD90 /* mingren.jpg */; }; FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797CE21D6506F00F2FD90 /* mingren.jpg */; };
FCAFD84B2231614200496A36 /* yolo_16_param in Resources */ = {isa = PBXBuildFile; fileRef = FCAFD8492231614200496A36 /* yolo_16_param */; };
FCAFD84C2231614200496A36 /* yolo_16_model in Resources */ = {isa = PBXBuildFile; fileRef = FCAFD84A2231614200496A36 /* yolo_16_model */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; }; FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */ = {isa = PBXBuildFile; fileRef = FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */; };
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; }; FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; };
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */; };
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */; };
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCE834B02232B6DC0057BF43 /* vision_synset.txt */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; }; FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
FCFADE34222F63CC0037DCE8 /* test_big.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FCFADE33222F63CB0037DCE8 /* test_big.JPG */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */ /* Begin PBXCopyFilesBuildPhase section */
...@@ -101,8 +106,6 @@ ...@@ -101,8 +106,6 @@
FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; }; FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; };
FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; }; FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; };
FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; }; FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; };
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; }; FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; }; FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; }; FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
...@@ -112,10 +115,17 @@ ...@@ -112,10 +115,17 @@
FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; }; FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; }; FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FC9797CE21D6506F00F2FD90 /* mingren.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = mingren.jpg; sourceTree = "<group>"; }; FC9797CE21D6506F00F2FD90 /* mingren.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = mingren.jpg; sourceTree = "<group>"; };
FCAFD8492231614200496A36 /* yolo_16_param */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_16_param; sourceTree = "<group>"; };
FCAFD84A2231614200496A36 /* yolo_16_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_16_model; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; }; FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */ = {isa = PBXFileReference; lastKnownFileType = "archive.metal-library"; name = "paddle-mobile-metallib.metallib"; path = "../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"; sourceTree = "<group>"; };
FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; }; FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; };
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FCE834B02232B6DC0057BF43 /* vision_synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = vision_synset.txt; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; }; FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
FCFADE33222F63CB0037DCE8 /* test_big.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test_big.JPG; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
...@@ -153,6 +163,7 @@ ...@@ -153,6 +163,7 @@
FC039B7520E11C550081E9F8 = { FC039B7520E11C550081E9F8 = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */,
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */, FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */,
FC039B8020E11C550081E9F8 /* paddle-mobile-demo */, FC039B8020E11C550081E9F8 /* paddle-mobile-demo */,
FC039B7F20E11C550081E9F8 /* Products */, FC039B7F20E11C550081E9F8 /* Products */,
...@@ -193,6 +204,7 @@ ...@@ -193,6 +204,7 @@
FC203FA821CBFDBA00B37166 /* images */ = { FC203FA821CBFDBA00B37166 /* images */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCFADE33222F63CB0037DCE8 /* test_big.JPG */,
FC2BFCBF21DF279900C262B2 /* classify-img-output.png */, FC2BFCBF21DF279900C262B2 /* classify-img-output.png */,
FC2BFCBD21DF15D900C262B2 /* 123.jpg */, FC2BFCBD21DF15D900C262B2 /* 123.jpg */,
FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */, FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */,
...@@ -257,21 +269,13 @@ ...@@ -257,21 +269,13 @@
FC704C1B21D237FC00F98BAB /* vision_model */ = { FC704C1B21D237FC00F98BAB /* vision_model */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC704C1C21D237FC00F98BAB /* mobilenet */, FCE834AB2232A4AE0057BF43 /* vision_mobilenet */,
FCAFD8482231614200496A36 /* yolo_16 */,
FC704C1F21D237FC00F98BAB /* yolo */, FC704C1F21D237FC00F98BAB /* yolo */,
); );
path = vision_model; path = vision_model;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC704C1C21D237FC00F98BAB /* mobilenet */ = {
isa = PBXGroup;
children = (
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */,
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */,
);
path = mobilenet;
sourceTree = "<group>";
};
FC704C1F21D237FC00F98BAB /* yolo */ = { FC704C1F21D237FC00F98BAB /* yolo */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
...@@ -316,6 +320,25 @@ ...@@ -316,6 +320,25 @@
path = mobilenet; path = mobilenet;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCAFD8482231614200496A36 /* yolo_16 */ = {
isa = PBXGroup;
children = (
FCAFD8492231614200496A36 /* yolo_16_param */,
FCAFD84A2231614200496A36 /* yolo_16_model */,
);
path = yolo_16;
sourceTree = "<group>";
};
FCE834AB2232A4AE0057BF43 /* vision_mobilenet */ = {
isa = PBXGroup;
children = (
FCE834B02232B6DC0057BF43 /* vision_synset.txt */,
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */,
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */,
);
path = vision_mobilenet;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXNativeTarget section */ /* Begin PBXNativeTarget section */
...@@ -381,20 +404,25 @@ ...@@ -381,20 +404,25 @@
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */, FCCED60521D7646E00BE8D5F /* test_image_super in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */, FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */, FCAFD84B2231614200496A36 /* yolo_16_param in Resources */,
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */, FC704C1921D2375300F98BAB /* super_params in Resources */,
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */, FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */, FC9797C721D609FB00F2FD90 /* synset.txt in Resources */,
FCFADE34222F63CC0037DCE8 /* test_big.JPG in Resources */,
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */, FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */,
FC704C1A21D2375300F98BAB /* super_model in Resources */, FC704C1A21D2375300F98BAB /* super_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */, FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */,
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */, FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */,
FCAFD84C2231614200496A36 /* yolo_16_model in Resources */,
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */, FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */,
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */, FC203FB221CBFDBA00B37166 /* test.jpg in Resources */,
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */, FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */,
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */, FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */,
FC704C2421D237FC00F98BAB /* yolo_params in Resources */, FC704C2421D237FC00F98BAB /* yolo_params in Resources */,
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */,
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */, FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */,
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */, FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */,
FC704C2521D237FC00F98BAB /* yolo_model in Resources */, FC704C2521D237FC00F98BAB /* yolo_model in Resources */,
......
{
"images" : [
{
"idiom" : "universal",
"filename" : "paddle-mobile.png",
"scale" : "1x"
},
{
"idiom" : "universal",
"scale" : "2x"
},
{
"idiom" : "universal",
"scale" : "3x"
}
],
"info" : {
"version" : 1,
"author" : "xcode"
}
}
\ No newline at end of file
...@@ -155,7 +155,7 @@ ...@@ -155,7 +155,7 @@
<nil key="textColor"/> <nil key="textColor"/>
<nil key="highlightedColor"/> <nil key="highlightedColor"/>
</label> </label>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile.png" translatesAutoresizingMaskIntoConstraints="NO" id="4ey-Xr-U4e"> <imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile" translatesAutoresizingMaskIntoConstraints="NO" id="4ey-Xr-U4e">
<rect key="frame" x="90" y="637" width="195" height="30"/> <rect key="frame" x="90" y="637" width="195" height="30"/>
<constraints> <constraints>
<constraint firstAttribute="width" secondItem="4ey-Xr-U4e" secondAttribute="height" multiplier="6.5:1" id="8c5-FF-lB9"/> <constraint firstAttribute="width" secondItem="4ey-Xr-U4e" secondAttribute="height" multiplier="6.5:1" id="8c5-FF-lB9"/>
...@@ -246,10 +246,6 @@ ...@@ -246,10 +246,6 @@
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/> <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/> <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews> <subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="2p5-S3-M4T">
<rect key="frame" x="16" y="63" width="240" height="128"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
</imageView>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="37q-nm-0H7"> <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="37q-nm-0H7">
<rect key="frame" x="38" y="610" width="42" height="30"/> <rect key="frame" x="38" y="610" width="42" height="30"/>
<constraints> <constraints>
...@@ -287,7 +283,16 @@ ...@@ -287,7 +283,16 @@
<constraint firstAttribute="height" constant="30" id="eAt-Uc-BxX"/> <constraint firstAttribute="height" constant="30" id="eAt-Uc-BxX"/>
</constraints> </constraints>
<state key="normal" title="clear"/> <state key="normal" title="clear"/>
<connections>
<action selector="clear:" destination="4MS-jc-i6A" eventType="touchUpInside" id="yW8-Dq-qwU"/>
</connections>
</button> </button>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="2p5-S3-M4T">
<rect key="frame" x="0.0" y="20" width="375" height="211"/>
<constraints>
<constraint firstAttribute="width" secondItem="2p5-S3-M4T" secondAttribute="height" multiplier="16:9" id="9Gh-8L-t3g"/>
</constraints>
</imageView>
</subviews> </subviews>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/> <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints> <constraints>
...@@ -295,20 +300,26 @@ ...@@ -295,20 +300,26 @@
<constraint firstItem="DZa-sd-lY7" firstAttribute="leading" secondItem="pdS-6e-Pd1" secondAttribute="trailing" constant="45" id="8dB-uI-cs9"/> <constraint firstItem="DZa-sd-lY7" firstAttribute="leading" secondItem="pdS-6e-Pd1" secondAttribute="trailing" constant="45" id="8dB-uI-cs9"/>
<constraint firstItem="fAg-ai-yaA" firstAttribute="leading" secondItem="37q-nm-0H7" secondAttribute="trailing" constant="39" id="EAV-Oq-jeD"/> <constraint firstItem="fAg-ai-yaA" firstAttribute="leading" secondItem="37q-nm-0H7" secondAttribute="trailing" constant="39" id="EAV-Oq-jeD"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="fAg-ai-yaA" secondAttribute="bottom" constant="27" id="Px0-A9-Eql"/> <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="fAg-ai-yaA" secondAttribute="bottom" constant="27" id="Px0-A9-Eql"/>
<constraint firstItem="2p5-S3-M4T" firstAttribute="leading" secondItem="vsb-FH-h7h" secondAttribute="leading" id="RNx-6D-oix"/>
<constraint firstItem="pdS-6e-Pd1" firstAttribute="leading" secondItem="fAg-ai-yaA" secondAttribute="trailing" constant="32" id="ZUR-Nv-aNb"/> <constraint firstItem="pdS-6e-Pd1" firstAttribute="leading" secondItem="fAg-ai-yaA" secondAttribute="trailing" constant="32" id="ZUR-Nv-aNb"/>
<constraint firstItem="2p5-S3-M4T" firstAttribute="top" secondItem="vsb-FH-h7h" secondAttribute="top" id="atk-ma-aSA"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="pdS-6e-Pd1" secondAttribute="bottom" constant="27" id="kPx-mt-ab9"/> <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="pdS-6e-Pd1" secondAttribute="bottom" constant="27" id="kPx-mt-ab9"/>
<constraint firstItem="2p5-S3-M4T" firstAttribute="trailing" secondItem="vsb-FH-h7h" secondAttribute="trailing" id="mwX-bu-jJY"/>
<constraint firstItem="37q-nm-0H7" firstAttribute="leading" secondItem="vsb-FH-h7h" secondAttribute="leading" constant="38" id="trH-Fq-sSv"/> <constraint firstItem="37q-nm-0H7" firstAttribute="leading" secondItem="vsb-FH-h7h" secondAttribute="leading" constant="38" id="trH-Fq-sSv"/>
<constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="DZa-sd-lY7" secondAttribute="bottom" constant="27" id="yNJ-hq-2Qg"/> <constraint firstItem="vsb-FH-h7h" firstAttribute="bottom" secondItem="DZa-sd-lY7" secondAttribute="bottom" constant="27" id="yNJ-hq-2Qg"/>
</constraints> </constraints>
<viewLayoutGuide key="safeArea" id="vsb-FH-h7h"/> <viewLayoutGuide key="safeArea" id="vsb-FH-h7h"/>
</view> </view>
<connections>
<outlet property="imageView" destination="2p5-S3-M4T" id="ePO-1L-eb4"/>
</connections>
</viewController> </viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/> <placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects> </objects>
<point key="canvasLocation" x="-721" y="-427"/> <point key="canvasLocation" x="-135.19999999999999" y="-218.1409295352324"/>
</scene> </scene>
</scenes> </scenes>
<resources> <resources>
<image name="paddle-mobile.png" width="402" height="62"/> <image name="paddle-mobile" width="402" height="62"/>
</resources> </resources>
</document> </document>
...@@ -17,14 +17,15 @@ import MetalKit ...@@ -17,14 +17,15 @@ import MetalKit
import Foundation import Foundation
import paddle_mobile import paddle_mobile
public class MetalHelper { @objc public class MetalHelper: NSObject {
let device: MTLDevice @objc let device: MTLDevice
let queue: MTLCommandQueue @objc let queue: MTLCommandQueue
let textureLoader: MTKTextureLoader @objc let textureLoader: MTKTextureLoader
static let shared: MetalHelper = MetalHelper.init() @objc static let shared: MetalHelper = MetalHelper.init()
private init(){ private override init(){
device = MTLCreateSystemDefaultDevice()! device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()! queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device) textureLoader = MTKTextureLoader.init(device: device)
super.init()
} }
} }
...@@ -30,37 +30,37 @@ class MultiPredictViewController: UIViewController { ...@@ -30,37 +30,37 @@ class MultiPredictViewController: UIViewController {
@IBAction func predictAct(_ sender: Any) { @IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load() let success = self.runner2.load()
// DispatchQueue.global().async { // DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg") let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load() // let success = self.runner2.load()
// if success { // if success {
// for i in 0..<10000 { // for i in 0..<10000 {
// print(i) // print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ") // print("result1: ")
//// print(res) //// print(res)
// }) // })
// } // }
// } else { // } else {
// print("load failed") // print("load failed")
// } // }
// self.runner1.clear() // self.runner1.clear()
// } // }
// return // return
// DispatchQueue.global().async { // DispatchQueue.global().async {
//// sleep(1) //// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg") // let image1 = UIImage.init(named: "banana.jpeg")
//// if success { //// if success {
// for _ in 0..<10 { // for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in // self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ") // print("result2: ")
// print(res) // print(res)
// }) // })
// } // }
//// } else { //// } else {
//// print("load failed") //// print("load failed")
//// } //// }
//// self.runner2.clear() //// self.runner2.clear()
// } // }
} }
} }
...@@ -22,14 +22,18 @@ public class Genet: Net { ...@@ -22,14 +22,18 @@ public class Genet: Net {
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3]) inputDim = Dim.init(inDim: [1, 128, 128, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
@objc override public init(device: MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device, super.init(device: device,
paramPointer: paramPointer, inParamPointer: inParamPointer,
paramSize: paramSize, inParamSize: inParamSize,
modePointer: modePointer, inModelPointer: inModelPointer,
modelSize: modelSize) inModelSize: inModelSize)
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 128, 128, 3]) inputDim = Dim.init(inDim: [1, 128, 128, 3])
} }
...@@ -41,8 +45,8 @@ public class Genet: Net { ...@@ -41,8 +45,8 @@ public class Genet: Net {
} }
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res.result[0]) ... " return " \(res[0].result[0]) ... "
} }
} }
...@@ -43,10 +43,10 @@ public class MobileNet: Net{ ...@@ -43,10 +43,10 @@ public class MobileNet: Net{
let labels = PreWords.init(fileName: "synset") let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
let resPointer = res.result let resPointer = res[0].result
var s: [String] = [] var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{ (0..<res[0].capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
} }
return s.joined(separator: "\n") return s.joined(separator: "\n")
...@@ -57,10 +57,12 @@ public class MobileNet: Net{ ...@@ -57,10 +57,12 @@ public class MobileNet: Net{
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib // metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " // metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device) preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
} }
...@@ -22,12 +22,40 @@ public class MobileNetCombined: Net { ...@@ -22,12 +22,40 @@ public class MobileNetCombined: Net {
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " let paddleMobileMetallib = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
metalLibPath = paddleMobileMetallib
useMPS = true
preprocessKernel = ScaleKernel.init(device: device, shape: Shape.init(inWidth: 224, inHeight: 224, inChannel: 3), metalLoadMode: .LoadMetalInCustomMetalLib, metalLibPath: paddleMobileMetallib)
}
let labels = PreWords.init(fileName: "vision_synset")
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res.result[0]) ... " let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
} }
} }
...@@ -21,19 +21,19 @@ public class MobileNet_ssd_hand: Net { ...@@ -21,19 +21,19 @@ public class MobileNet_ssd_hand: Net {
except = 2 except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3]) inputDim = Dim.init(inDim: [1, 300, 300, 3])
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice,inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer inModePointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModePointer,inModelSize:inModelSize)
except = 2 except = 2
modelPath = "" modelPath = ""
paramPath = "" paramPath = ""
// metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 300, 300, 3]) inputDim = Dim.init(inDim: [1, 300, 300, 3])
} }
...@@ -45,51 +45,51 @@ public class MobileNet_ssd_hand: Net { ...@@ -45,51 +45,51 @@ public class MobileNet_ssd_hand: Net {
} }
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res)" return " \(res[0])"
} }
override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
// guard let interRes = paddleMobileRes.intermediateResults else { // guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ") // fatalError(" need have inter result ")
// } // }
// //
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else { // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ") // fatalError(" need score ")
// } // }
// //
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else { // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError() // fatalError()
// } // }
// //
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) // var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ") //// print("score: ")
//// print(scoreFormatArr.strideArray()) //// print(scoreFormatArr.strideArray())
//// ////
// var bboxArr = bbox.metalTexture.float32Array() // var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ") //// print("bbox: ")
//// print(bboxArr.strideArray()) //// print(bboxArr.strideArray())
// //
// let nmsCompute = NMSCompute.init() // let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01 // nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400 // nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200 // nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0 // nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45 // nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0; // nmsCompute.background_label = 0;
// //
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] // nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
// //
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] // nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { // guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " ) // fatalError( " result error " )
// } // }
// //
// let output: [Float32] = result.map { $0.floatValue } // let output: [Float32] = result.map { $0.floatValue }
// //
// //
// return output // return output
fatalError() fatalError()
} }
......
...@@ -23,13 +23,17 @@ public class MobileNet_ssd_AR: Net { ...@@ -23,13 +23,17 @@ public class MobileNet_ssd_AR: Net {
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3]) inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize:Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device:device,inParamPointer:inParamPointer,inParamSize:inParamSize,inModelPointer:inModelPointer,inModelSize:inModelSize)
except = 2 except = 2
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 160, 160, 3]) inputDim = Dim.init(inDim: [1, 160, 160, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
...@@ -39,110 +43,110 @@ public class MobileNet_ssd_AR: Net { ...@@ -39,110 +43,110 @@ public class MobileNet_ssd_AR: Net {
} }
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res.result[0])" return " \(res[0].result[0])"
} }
override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { override public func fetchResult(paddleMobileRes: [GPUResultHolder]) -> [ResultHolder] {
fatalError() fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else { // guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ") // fatalError(" need have inter result ")
// } // }
// //
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else { // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ") // fatalError(" need score ")
// } // }
// //
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else { // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError() // fatalError()
// } // }
// let startDate = Date.init() // let startDate = Date.init()
// print("scoreFormatArr: ") // print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray()) //print((0..<score.capacity).map{ score.result[$0] }.strideArray())
// //
// print("bbox arr: ") // print("bbox arr: ")
// //
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray()) // print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
// let nmsCompute = NMSCompute.init() // let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.25 // nmsCompute.scoreThredshold = 0.25
// nmsCompute.nmsTopK = 100 // nmsCompute.nmsTopK = 100
// nmsCompute.keepTopK = 100 // nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0 // nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.449999988 // nmsCompute.nmsThreshold = 0.449999988
// nmsCompute.background_label = 0; // nmsCompute.background_label = 0;
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])] // nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])] // nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else { // guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// fatalError( " result error " ) // fatalError( " result error " )
// } // }
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize)) // let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) { // for i in 0..<Int(result.outputSize) {
// //
// print("i \(i) : \(result.output[i])") // print("i \(i) : \(result.output[i])")
// } // }
// print(Date.init().timeIntervalSince(startDate)) // print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0]) // print(resultHolder.result![0])
// return resultHolder // return resultHolder
} }
// override func updateProgram(program: Program) { // override func updateProgram(program: Program) {
// for i in [56, 66, 76, 86, 93, 99] { // for i in [56, 66, 76, 86, 93, 99] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
// //
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) // originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
// //
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) // originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
// //
// program.scope[output] = originTexture // program.scope[output] = originTexture
// //
// if i == 99 { // if i == 99 {
// opDesc.attrs["axis"] = 0 // opDesc.attrs["axis"] = 0
// } else { // } else {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// } // }
// } // }
// //
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] { // for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// //
// //
// //
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// } // }
// //
// for i in [60, 101, 90, 97, 70, 80] { // for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// } // }
// //
// for i in [102] { // for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i] // let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! { // for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]! // let v = program.scope[output]!
// let originTexture = v as! Texture // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// } // }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])") // print(" split axis \(opDesc.attrs["axis"])")
// } // }
// 99 // 99
// } // }
} }
...@@ -20,15 +20,19 @@ public class YoloNet: Net { ...@@ -20,15 +20,19 @@ public class YoloNet: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "yolo_16_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "yolo_16_param", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3]) inputDim = Dim.init(inDim: [1, 416, 416, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = true
paramPrecision = .Float16
preprocessKernel = ScaleKernel.init(device: device, shape: Shape.init(inWidth: 416, inHeight: 416, inChannel: 3), metalLoadMode: .LoadMetalInCustomMetalLib, metalLibPath: Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib"))
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res.result[0]) ... " return " \(res[0].result[0]) ... "
} }
} }
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#import <UIKit/UIKit.h> #import <UIKit/UIKit.h>
/**
@b 从内存中加载模型 Demo, 可以在 main storyboard 中调整 Demo
*/
@interface LoadPointerViewController : UIViewController @interface LoadPointerViewController : UIViewController
@end @end
...@@ -13,17 +13,20 @@ ...@@ -13,17 +13,20 @@
limitations under the License. */ limitations under the License. */
#import "PaddleMobileGPU.h" #import "PaddleMobileGPU.h"
#import "paddle_mobile_demo-Swift.h"
#import "LoadPointerViewController.h" #import "LoadPointerViewController.h"
#import "paddle-mobile-demo-Bridging-Header.h"
#import <Metal/Metal.h> #import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
@interface LoadPointerViewController () @interface LoadPointerViewController ()
@property (strong, nonatomic) id<MTLDevice> device; @property (weak, nonatomic) IBOutlet UIImageView *imageView;
@property (assign, nonatomic) BOOL loaded;
@property (strong, nonatomic) id<MTLTexture> texture; @property (strong, nonatomic) id<MTLTexture> texture;
@property (strong, nonatomic) id<MTLCommandQueue> queue;
@property (strong, nonatomic) PaddleMobileGPU *runner; @property (strong, nonatomic) PaddleMobileGPU *paddleMobile;
@property (strong, nonatomic) ModelConfig *modelConfig; @property (strong, nonatomic) ModelConfig *modelConfig;
@end @end
...@@ -33,17 +36,10 @@ ...@@ -33,17 +36,10 @@
- (void)viewDidLoad { - (void)viewDidLoad {
[super viewDidLoad]; [super viewDidLoad];
self.imageView.image = [UIImage imageNamed:@"banana.jpeg"];
self.device = MTLCreateSystemDefaultDevice(); NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"super_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"super_params" withExtension:nil].path;
self.queue = [self.device newCommandQueue];
// Do any additional setup after loading the view.
// NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"genet_model" withExtension:nil].path;
// NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"genet_params" withExtension:nil].path;
NSString *modelPath = [[NSBundle mainBundle] URLForResource:@"ar_model" withExtension:nil].path;
NSString *paramPath = [[NSBundle mainBundle] URLForResource:@"ar_params" withExtension:nil].path;
long fileSize; long fileSize;
FILE *fp; FILE *fp;
...@@ -66,114 +62,55 @@ ...@@ -66,114 +62,55 @@
fclose(parmaFilePointer); fclose(parmaFilePointer);
_modelConfig = [[ModelConfig alloc] init]; _modelConfig = [[ModelConfig alloc] init];
// _modelConfig.means = @[[NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0], [NSNumber numberWithFloat:128.0]];
// _modelConfig.scale = 0.017;
// _modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:128.], [NSNumber numberWithFloat:128.0],[NSNumber numberWithFloat:3.0]];
_modelConfig.means = @[[NSNumber numberWithFloat:103.94], [NSNumber numberWithFloat:116.78], [NSNumber numberWithFloat:123.68]];
_modelConfig.scale = 1;
_modelConfig.dims = @[[NSNumber numberWithFloat:1], [NSNumber numberWithFloat:160.], [NSNumber numberWithFloat:160.0],[NSNumber numberWithFloat:3.0]];
_modelConfig.modelPointer = buffer; _modelConfig.modelPointer = buffer;
_modelConfig.modelSize = (int)fileSize; _modelConfig.modelSize = (int)fileSize;
_modelConfig.paramPointer = parmaBuffer; _modelConfig.paramPointer = parmaBuffer;
_modelConfig.paramSize = (int)paramfileSize; _modelConfig.paramSize = (int)paramfileSize;
} }
- (IBAction)loaderButtonPressed:(id)sender { - (IBAction)loaderButtonPressed:(id)sender {
// _runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:GenetType modelConfig:_modelConfig]; self.paddleMobile = [[PaddleMobileGPU alloc] initWithCommandQueue:MetalHelper.shared.queue net:SuperResolutionNetType modelConfig:_modelConfig];
_runner = [[PaddleMobileGPU alloc] initWithCommandQueue:self.queue net:MobileNetSSDType modelConfig:_modelConfig]; _loaded = [self.paddleMobile load];
NSLog(@" load 结果: %@", _loaded ? @"成功" : @"失败");
[_runner load];
} }
- (IBAction)predictButtonPressed:(id)sender { - (IBAction)predictButtonPressed:(id)sender {
[self predict]; [self predict];
} }
- (id<MTLTexture>) createTextureFromImage:(UIImage*) image device:(id<MTLDevice>) device - (void)predict {
{ UIImage *image = self.imageView.image;
image =[UIImage imageWithCGImage:[image CGImage] if (!image) {
scale:[image scale] NSLog(@" image is nil");
orientation: UIImageOrientationLeft]; return;
}
NSLog(@"orientation and size and stuff %ld %f %f", (long)image.imageOrientation, image.size.width, image.size.height); id<MTLTexture> texture = [MetalHelper.shared.textureLoader newTextureWithCGImage:image.CGImage options:nil error:nil];
_texture = texture;
CGImageRef imageRef = image.CGImage; if (!_texture) {
NSLog(@" texture is nil");
size_t width = self.view.frame.size.width; return;
size_t height = self.view.frame.size.height;
size_t bitsPerComponent = CGImageGetBitsPerComponent(imageRef);
size_t bitsPerPixel = CGImageGetBitsPerPixel(imageRef);
CGColorSpaceRef colorSpace = CGImageGetColorSpace(imageRef);
CGImageAlphaInfo alphaInfo = CGImageGetAlphaInfo(imageRef);
// NSLog(@"%@ %u", colorSpace, alphaInfo);
CGBitmapInfo bitmapInfo = kCGBitmapByteOrderDefault | alphaInfo;
// NSLog(@"bitmap info %u", bitmapInfo);
CGContextRef context = CGBitmapContextCreate( NULL, width, height, bitsPerComponent, (bitsPerPixel / 8) * width, colorSpace, bitmapInfo);
if( !context )
{
NSLog(@"Failed to load image, probably an unsupported texture type");
return nil;
} }
CGContextDrawImage( context, CGRectMake( 0, 0, width, height ), image.CGImage); if (!self.loaded) {
NSLog(@" not load ");
return;
MTLPixelFormat format = MTLPixelFormatRGBA8Unorm; }
MTLTextureDescriptor *texDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format
width:width
height:height
mipmapped:NO];
id<MTLTexture> texture = [device newTextureWithDescriptor:texDesc];
[texture replaceRegion:MTLRegionMake2D(0, 0, width, height)
mipmapLevel:0
withBytes:CGBitmapContextGetData(context)
bytesPerRow:4 * width];
return texture;
}
- (void)predict {
_texture = [self createTextureFromImage:[UIImage imageNamed:@"hand.jpg"] device:self.device];
NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970]; NSTimeInterval startTime = [[NSDate date] timeIntervalSince1970];
NSInteger max = 428; NSInteger max = 1;
for (int i = 0;i < max; i ++) { for (int i = 0;i < max; i ++) {
[_runner predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) { [self.paddleMobile predict:_texture withCompletion:^(BOOL success , NSArray<NSNumber *> *result) {
if (success) { if (success) {
if (i == max -1) { if (i == max -1) {
double time = [[NSDate date] timeIntervalSince1970] - startTime; double time = [[NSDate date] timeIntervalSince1970] - startTime;
time = (time/max)*1000; time = (time/max)*1000;
NSLog(@"gap ==== %fms",time); NSLog(@"gap ==== %fms",time);
} }
// for (int i = 0; i < result.count; i ++) {
// NSNumber *number = result[i];
// NSLog(@"result %d = %f:",i, [number floatValue]);
// }
} }
}]; }];
} }
} }
- (IBAction)clear:(id)sender {
- (void)didReceiveMemoryWarning { [self.paddleMobile clear];
[super didReceiveMemoryWarning]; self.loaded = NO;
// Dispose of any resources that can be recreated.
}
/*
#pragma mark - Navigation
// In a storyboard-based application, you will often want to do a little preparation before navigation
- (void)prepareForSegue:(UIStoryboardSegue *)segue sender:(id)sender {
// Get the new view controller using [segue destinationViewController].
// Pass the selected object to the new view controller.
} }
*/
@end @end
...@@ -26,6 +26,8 @@ typedef enum : NSUInteger { ...@@ -26,6 +26,8 @@ typedef enum : NSUInteger {
@property (assign, nonatomic) int outputSize; @property (assign, nonatomic) int outputSize;
@property (strong, nonatomic) NSArray <NSNumber *>*dim;
-(void)releaseOutput; -(void)releaseOutput;
@end @end
...@@ -88,13 +90,13 @@ typedef enum : NSUInteger { ...@@ -88,13 +90,13 @@ typedef enum : NSUInteger {
* texture: 需要进行预测的图像转换的 texture * texture: 需要进行预测的图像转换的 texture
* completion: 预测完成回调 * completion: 预测完成回调
*/ */
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion; -(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSArray <NSNumber *>*> *))completion;
/* /*
* texture: 需要进行预测的图像转换的 texture * texture: 需要进行预测的图像转换的 texture
* completion: 预测完成回调 * completion: 预测完成回调
*/ */
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion; -(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, NSArray <PaddleMobileGPUResult *> *))completion;
/* /*
* 清理内存 * 清理内存
......
...@@ -53,9 +53,9 @@ ...@@ -53,9 +53,9 @@
if (self) { if (self) {
Net *net = nil; Net *net = nil;
if (netType == SuperResolutionNetType) { if (netType == SuperResolutionNetType) {
net = [[SuperResolutionNet alloc] initWithDevice:queue.device]; net = [[SuperResolutionNet alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
} else if (netType == MobileNetSSDType) { } else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize]; net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device inParamPointer:config.paramPointer inParamSize:config.paramSize inModelPointer:config.modelPointer inModelSize:config.modelSize];
} }
runner = [[Runner alloc] initInNet:net commandQueue:queue]; runner = [[Runner alloc] initInNet:net commandQueue:queue];
} }
...@@ -66,24 +66,34 @@ ...@@ -66,24 +66,34 @@
return [runner load]; return [runner load];
} }
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion { -(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSArray <NSNumber *>*> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) { [runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity]; NSMutableArray<NSMutableArray <NSNumber *>*> *ocResultArray = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < result.capacity; ++i) { for (int i = 0; i < resultArr.count; ++i) {
[resultArray addObject:[NSNumber numberWithFloat:result.result[i]]]; ResultHolder *resultHolder = resultArr[i];
NSMutableArray <NSNumber *>*res = [NSMutableArray arrayWithCapacity:resultHolder.capacity];
for (int j = 0; j < resultHolder.capacity; ++j) {
[res addObject:[NSNumber numberWithFloat:resultHolder.result[i]]];
} }
completion(success, resultArray); [ocResultArray addObject:res];
[result releasePointer]; [resultHolder releasePointer];
}
completion(success, ocResultArray);
}]; }];
} }
-(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, PaddleMobileGPUResult *))completion { -(void)predict:(id<MTLTexture>)texture withResultCompletion:(void (^)(BOOL, NSArray <PaddleMobileGPUResult *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) { [runner predictWithTexture:texture completion:^(BOOL success, NSArray<ResultHolder *> * _Nullable resultArr) {
NSMutableArray <PaddleMobileGPUResult *> *ocResultArr = [NSMutableArray arrayWithCapacity:resultArr.count];
for (int i = 0; i < resultArr.count; ++i) {
ResultHolder *result = resultArr[i];
PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init]; PaddleMobileGPUResult *gpuResult = [[PaddleMobileGPUResult alloc] init];
gpuResult.dim = result.dim;
[gpuResult setOutputResult:result]; [gpuResult setOutputResult:result];
completion(success, gpuResult); [ocResultArr addObject:gpuResult];
}
completion(success, ocResultArr);
}]; }];
} }
......
...@@ -16,10 +16,22 @@ import Foundation ...@@ -16,10 +16,22 @@ import Foundation
import paddle_mobile import paddle_mobile
@objc public class SuperResolutionNet: Net{ @objc public class SuperResolutionNet: Net{
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return "未实现" return "未实现"
} }
public override init(device: MTLDevice, inParamPointer: UnsafeMutableRawPointer, inParamSize: Int, inModelPointer: UnsafeMutableRawPointer, inModelSize: Int) {
super.init(device: device)
except = 0
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
inputDim = Dim.init(inDim: [1, 224, 224, 3])
self.paramPointer = inParamPointer
self.paramSize = inParamSize
self.modelPointer = inModelPointer
self.modelSize = inModelSize
}
@objc override public init(device: MTLDevice) { @objc override public init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
except = 0 except = 0
...@@ -27,8 +39,8 @@ import paddle_mobile ...@@ -27,8 +39,8 @@ import paddle_mobile
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1]) inputDim = Dim.init(inDim: [1, 224, 224, 1])
// metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil " metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
} }
override public func updateProgram(program: Program) { override public func updateProgram(program: Program) {
......
...@@ -14,7 +14,7 @@ import AVFoundation ...@@ -14,7 +14,7 @@ import AVFoundation
/** /**
Simple interface to the iPhone's camera. Simple interface to the iPhone's camera.
*/ */
@available(iOS 10.0, *) @available(iOS 10.0, *)
public class VideoCapture: NSObject { public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer? public var previewLayer: AVCaptureVideoPreviewLayer?
......
...@@ -89,26 +89,28 @@ class ViewController: UIViewController { ...@@ -89,26 +89,28 @@ class ViewController: UIViewController {
@IBAction func loadAct(_ sender: Any) { @IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue) runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU { if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil) // let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!) // let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read() // let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
// //
// //
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared) // let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
// //
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize) // buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil { if self.toPredictTexture == nil {
let beforeDate = Date.init()
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in if modelType == .mobilenet_combined || modelType == .yolo {
// self?.toPredictTexture = texture self.toPredictTexture = try! MetalHelper.shared.textureLoader.newTexture(cgImage: selectImage!.cgImage!, options: nil)
// } } else {
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
let timeUse = Date.init().timeIntervalSince(beforeDate)
print("get texture time use: \(timeUse)")
self?.toPredictTexture = texture self?.toPredictTexture = texture
} }
} }
}
} else { } else {
fatalError( " unsupport " ) fatalError( " unsupport " )
} }
...@@ -147,7 +149,8 @@ class ViewController: UIViewController { ...@@ -147,7 +149,8 @@ class ViewController: UIViewController {
fatalError() fatalError()
} }
if success, let inResultHolder = resultHolder { if success, let inResultHolderArr = resultHolder {
let inResultHolder = inResultHolderArr[0]
if i == max - 1 { if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate) let time = Date.init().timeIntervalSince(startDate)
...@@ -160,7 +163,7 @@ class ViewController: UIViewController { ...@@ -160,7 +163,7 @@ class ViewController: UIViewController {
} }
DispatchQueue.main.async { DispatchQueue.main.async {
resultHolder?.releasePointer() resultHolder?.first?.releasePointer()
} }
} }
} }
...@@ -170,42 +173,19 @@ class ViewController: UIViewController { ...@@ -170,42 +173,19 @@ class ViewController: UIViewController {
override func viewDidLoad() { override func viewDidLoad() {
super.viewDidLoad() super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
GlobalConfig.shared.debug = false
modelPickerView.delegate = self modelPickerView.delegate = self
modelPickerView.dataSource = self modelPickerView.dataSource = self
threadPickerView.delegate = self threadPickerView.delegate = self
threadPickerView.dataSource = self threadPickerView.dataSource = self
if let image = UIImage.init(named: "classify-img-output.png") { if let image = UIImage.init(named: "00001.jpg") {
selectImage = image selectImage = image
selectImageView.image = image selectImageView.image = image
} else { } else {
print("请添加测试图片") print("请添加测试图片")
} }
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
} }
} }
...@@ -271,8 +251,7 @@ extension ViewController: VideoCaptureDelegate{ ...@@ -271,8 +251,7 @@ extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){ func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0]) resultHolder?.first?.releasePointer()
resultHolder?.releasePointer()
}) })
} }
} }
......
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:paddle-mobile-metallib.xcodeproj">
</FileRef>
</Workspace>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1010"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FCC15D5F221E66DE00DC3CB2"
BuildableName = "paddle-mobile-metallib.metallib"
BlueprintName = "paddle-mobile-metallib"
ReferencedContainer = "container:paddle-mobile-metallib.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Release"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FCC15D5F221E66DE00DC3CB2"
BuildableName = "paddle-mobile-metallib.metallib"
BlueprintName = "paddle-mobile-metallib"
ReferencedContainer = "container:paddle-mobile-metallib.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FCC15D5F221E66DE00DC3CB2"
BuildableName = "paddle-mobile-metallib.metallib"
BlueprintName = "paddle-mobile-metallib"
ReferencedContainer = "container:paddle-mobile-metallib.xcodeproj">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
//
// BatchNormRelu.metal
// paddle-mobile
//
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
float4 input;
float4 output;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
...@@ -41,129 +41,129 @@ struct ConcatParam { ...@@ -41,129 +41,129 @@ struct ConcatParam {
// ssd-ar: (R=3, N=5, V=x) // ssd-ar: (R=3, N=5, V=x)
#define V VX #define V VX
#define R 3 #define R 3
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=2, N=5, V=x) // ssd-ar: (R=2, N=5, V=x)
#define V VX #define V VX
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=3, N=2, V=y) // ssd-ar: (R=3, N=2, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd-ar: (R=4, N=3, V=z) // ssd-ar: (R=4, N=3, V=z)
#define V VZ #define V VZ
#define R 4 #define R 4
#define N 3 #define N 3
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=2, N=6, V=y) // ssd: (R=2, N=6, V=y)
#define V VY #define V VY
#define R 2 #define R 2
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
// ssd: (R=3, N=6, V=y) // ssd: (R=3, N=6, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 6 #define N 6
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VNORMAL #define V VNORMAL
#define R 4 #define R 4
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 2 #define N 2
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
#define V VY #define V VY
#define R 2 #define R 2
#define N 5 #define N 5
#define P float #define P float
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "ConcatKernel.inc.metal" #include "ConcatKernel.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
kernel void conv_add_batch_norm_relu_1x1_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/*---------------------------------------------*/
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
...@@ -18,45 +18,45 @@ using namespace metal; ...@@ -18,45 +18,45 @@ using namespace metal;
#define P float #define P float
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
#define P half #define P half
#define PRELU_CHANNEL prelu_channel #define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel #define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_CHANNEL #undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element #define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element #define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_ELEMENT #undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other #define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other #define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal" #include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE #undef PRELU_TYPE
#undef PRELU_OTHER #undef PRELU_OTHER
#undef P #undef P
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - conv bn relu
kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
texture2d_array<float, access::read> inputY [[texture(1)]],
texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
...@@ -58,7 +58,7 @@ kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[text ...@@ -58,7 +58,7 @@ kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[text
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
// int input_height = inTexture.get_height(); // int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z); const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input; output[gid.y * input_width + gid.x] = input;
} }
...@@ -73,7 +73,7 @@ kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[ ...@@ -73,7 +73,7 @@ kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
// int input_height = inTexture.get_height(); // int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z); const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input); output[gid.y * input_width + gid.x] = float4(input);
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float4 alpha_value = alpha[gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 alpha_value = alpha[alpha_to + gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float alpha_value = alpha[0];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_channel_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half4 alpha_value = alpha[gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
half4 alpha_value = alpha[alpha_to + gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half alpha_value = alpha[0];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
half ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(max_box), gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) {
int skip = 0;
for (int i = 0; i < aspect_to + 1; ++i) {
if (fabs(aspect_ratios[i] - 1.) < 1e-6) {
skip += 1;
}
}
aspect_to += skip;
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) {
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct resize_bilinear_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_w;
};
kernel void resize_bilinear(texture2d_array<float, access::read> input [[texture(0)]],
texture2d_array<float, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
float4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
float w = gid.x * pm.ratio_w;
float h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
float w1lambda = w - w0, h1lambda = h - h0;
float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
float4 r0 = input.read(uint2(w0, h0), gid.z);
float4 r1 = input.read(uint2(w1, h0), gid.z);
float4 r2 = input.read(uint2(w0, h1), gid.z);
float4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
kernel void resize_bilinear_half(texture2d_array<half, access::read> input [[texture(0)]],
texture2d_array<half, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
half4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
half w = gid.x * pm.ratio_w;
half h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
half w1lambda = w - w0, h1lambda = h - h0;
half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
half4 r0 = input.read(uint2(w0, h0), gid.z);
half4 r1 = input.read(uint2(w1, h0), gid.z);
half4 r2 = input.read(uint2(w0, h1), gid.z);
half4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
output.write(r, gid.xy, gid.z);
}
...@@ -27,7 +27,7 @@ kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[textur ...@@ -27,7 +27,7 @@ kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[textur
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
// int zsize = inTexture.get_array_size(); // int zsize = inTexture.get_array_size();
P maxv = inTexture.read(uint2(0, gid.y), 0)[0]; P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
int group = sp.K / 4; int group = sp.K / 4;
int remain = sp.K % 4; int remain = sp.K % 4;
......
...@@ -36,29 +36,29 @@ struct SplitParam { ...@@ -36,29 +36,29 @@ struct SplitParam {
//// ssd-ar: (R=3, N=2, V=y) //// ssd-ar: (R=3, N=2, V=y)
#define V VY #define V VY
#define R 3 #define R 3
#define N 2 #define N 2
#define P float #define P float
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#define P half #define P half
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
//// ssd-ar: (R=2, N=2, V=y) //// ssd-ar: (R=2, N=2, V=y)
#define V VY #define V VY
#define R 2 #define R 2
#define N 2 #define N 2
#define P float #define P float
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#define P half #define P half
#include "Split.inc.metal" #include "Split.inc.metal"
#undef P #undef P
#undef N #undef N
#undef R #undef R
#undef V #undef V
...@@ -36,28 +36,28 @@ kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[ ...@@ -36,28 +36,28 @@ kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[
} }
#define R 4 #define R 4
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
#define R 3 #define R 3
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
#define R 2 #define R 2
#define P float #define P float
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#define P half #define P half
#include "TransposeKernel.inc.metal" #include "TransposeKernel.inc.metal"
#undef P #undef P
#undef R #undef R
...@@ -27,8 +27,8 @@ class ViewController: UIViewController { ...@@ -27,8 +27,8 @@ class ViewController: UIViewController {
inQueue: queue inQueue: queue
) )
test.testConcat() test.testConcat()
// test.testReshape() // test.testReshape()
// test.testTranspose() // test.testTranspose()
print(" done ") print(" done ")
} }
......
...@@ -21,7 +21,7 @@ import Foundation ...@@ -21,7 +21,7 @@ import Foundation
LoadMetalInCustomMetalLib = 3 // 使用 metal 库文件 LoadMetalInCustomMetalLib = 3 // 使用 metal 库文件
} }
@objc public enum ComputePrecision: Int { @objc public enum Precision: Int {
case case
Float32 = 1, Float32 = 1,
Float16 = 2 Float16 = 2
...@@ -33,6 +33,8 @@ import Foundation ...@@ -33,6 +33,8 @@ import Foundation
@objc public static let shared: GlobalConfig = GlobalConfig.init() @objc public static let shared: GlobalConfig = GlobalConfig.init()
/// 运算精度, runner 生命周期中不可变 /// 运算精度, runner 生命周期中不可变
@objc public var computePrecision: ComputePrecision = .Float16 @objc public var computePrecision: Precision = .Float16
/// 是否开启 log
@objc public var debug: Bool = false
} }
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
func writeToLibrary<P: PrecisionType>(fileName: String, array: [P]) { func writeToLibrary<P: PrecisionProtocol>(fileName: String, array: [P]) {
let libraryPath = NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true).last ?! " library path get error " let libraryPath = NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true).last ?! " library path get error "
let filePath = libraryPath + "/" + fileName let filePath = libraryPath + "/" + fileName
let fileManager = FileManager.init() let fileManager = FileManager.init()
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册