提交 721ad393 编写于 作者: Z zhangyang0701

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -33,8 +33,6 @@ ...@@ -33,8 +33,6 @@
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; }; FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; };
FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; }; FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; };
FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; }; FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; };
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */; };
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */; };
FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; }; FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; };
FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; }; FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; }; FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
...@@ -49,6 +47,9 @@ ...@@ -49,6 +47,9 @@
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; }; FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */ = {isa = PBXBuildFile; fileRef = FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */; }; FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */ = {isa = PBXBuildFile; fileRef = FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */; };
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; }; FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; };
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */; };
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */; };
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCE834B02232B6DC0057BF43 /* vision_synset.txt */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; }; FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
...@@ -105,8 +106,6 @@ ...@@ -105,8 +106,6 @@
FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; }; FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; };
FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; }; FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; };
FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; }; FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; };
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; }; FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; }; FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; }; FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
...@@ -121,6 +120,9 @@ ...@@ -121,6 +120,9 @@
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; }; FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */ = {isa = PBXFileReference; lastKnownFileType = "archive.metal-library"; name = "paddle-mobile-metallib.metallib"; path = "../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"; sourceTree = "<group>"; }; FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */ = {isa = PBXFileReference; lastKnownFileType = "archive.metal-library"; name = "paddle-mobile-metallib.metallib"; path = "../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"; sourceTree = "<group>"; };
FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; }; FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; };
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FCE834B02232B6DC0057BF43 /* vision_synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = vision_synset.txt; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; }; FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
FCFADE33222F63CB0037DCE8 /* test_big.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test_big.JPG; sourceTree = "<group>"; }; FCFADE33222F63CB0037DCE8 /* test_big.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test_big.JPG; sourceTree = "<group>"; };
...@@ -267,22 +269,13 @@ ...@@ -267,22 +269,13 @@
FC704C1B21D237FC00F98BAB /* vision_model */ = { FC704C1B21D237FC00F98BAB /* vision_model */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCE834AB2232A4AE0057BF43 /* vision_mobilenet */,
FCAFD8482231614200496A36 /* yolo_16 */, FCAFD8482231614200496A36 /* yolo_16 */,
FC704C1C21D237FC00F98BAB /* mobilenet */,
FC704C1F21D237FC00F98BAB /* yolo */, FC704C1F21D237FC00F98BAB /* yolo */,
); );
path = vision_model; path = vision_model;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC704C1C21D237FC00F98BAB /* mobilenet */ = {
isa = PBXGroup;
children = (
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */,
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */,
);
path = mobilenet;
sourceTree = "<group>";
};
FC704C1F21D237FC00F98BAB /* yolo */ = { FC704C1F21D237FC00F98BAB /* yolo */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
...@@ -336,6 +329,16 @@ ...@@ -336,6 +329,16 @@
path = yolo_16; path = yolo_16;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCE834AB2232A4AE0057BF43 /* vision_mobilenet */ = {
isa = PBXGroup;
children = (
FCE834B02232B6DC0057BF43 /* vision_synset.txt */,
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */,
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */,
);
path = vision_mobilenet;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXNativeTarget section */ /* Begin PBXNativeTarget section */
...@@ -401,8 +404,8 @@ ...@@ -401,8 +404,8 @@
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */, FCCED60521D7646E00BE8D5F /* test_image_super in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */, FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */,
FCAFD84B2231614200496A36 /* yolo_16_param in Resources */, FCAFD84B2231614200496A36 /* yolo_16_param in Resources */,
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */, FC704C1921D2375300F98BAB /* super_params in Resources */,
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */, FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
...@@ -411,14 +414,15 @@ ...@@ -411,14 +414,15 @@
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */, FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */,
FC704C1A21D2375300F98BAB /* super_model in Resources */, FC704C1A21D2375300F98BAB /* super_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */, FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */,
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */, FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */,
FCAFD84C2231614200496A36 /* yolo_16_model in Resources */, FCAFD84C2231614200496A36 /* yolo_16_model in Resources */,
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */, FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */,
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */, FC203FB221CBFDBA00B37166 /* test.jpg in Resources */,
FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */, FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */,
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */,
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */, FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */,
FC704C2421D237FC00F98BAB /* yolo_params in Resources */, FC704C2421D237FC00F98BAB /* yolo_params in Resources */,
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */,
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */, FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */,
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */, FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */,
FC704C2521D237FC00F98BAB /* yolo_model in Resources */, FC704C2521D237FC00F98BAB /* yolo_model in Resources */,
......
...@@ -24,10 +24,35 @@ public class MobileNetCombined: Net { ...@@ -24,10 +24,35 @@ public class MobileNetCombined: Net {
inputDim = Dim.init(inDim: [1, 224, 224, 3]) inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = true
}
let labels = PreWords.init(fileName: "vision_synset")
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
} }
override public func resultStr(res: [ResultHolder]) -> String { override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... " let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
} }
} }
...@@ -25,7 +25,7 @@ public class YoloNet: Net { ...@@ -25,7 +25,7 @@ public class YoloNet: Net {
inputDim = Dim.init(inDim: [1, 416, 416, 3]) inputDim = Dim.init(inDim: [1, 416, 416, 3])
metalLoadMode = .LoadMetalInCustomMetalLib metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib") metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = false useMPS = true
paramPrecision = .Float16 paramPrecision = .Float16
} }
......
...@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size(); uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]; float4 output = float4(biase[gid.z]);
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
...@@ -385,7 +385,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -385,7 +385,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(half4(output), gid.xy, gid.z);
} }
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
...@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in ...@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9; const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4; uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z]; float4 output = float4(biase[gid.z]);
half4 inputs[9]; half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
...@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in ...@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) { for (int j = 0; j < 9; ++j) {
half4 input = inputs[j]; half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(half4(output), gid.xy, gid.z);
} }
...@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]; float4 output = float4(biase[gid.z]);
ushort dilation_y = param.dilationY; ushort dilation_y = param.dilationY;
half4 input[5]; half4 input[5];
...@@ -471,20 +471,20 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -471,20 +471,20 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) { for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x); output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y); output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z); output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w); output.w += dot(float4(input[j]), float4(weight_w));
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(half4(output), gid.xy, gid.z);
} }
...@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4; uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]; float4 output = float4(biase[gid.z]);
ushort dilation_x = param.dilationX; ushort dilation_x = param.dilationX;
half4 input[5]; half4 input[5];
...@@ -530,20 +530,20 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[ ...@@ -530,20 +530,20 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) { for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x); output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y); output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z); output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w); output.w += dot(float4(input[j]), float4(weight_w));
} }
} }
// output = output + float4(biase[gid.z]); // output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z); outTexture.write(half4(output), gid.xy, gid.z);
} }
......
...@@ -117,10 +117,9 @@ public class Executor<P: PrecisionProtocol>: Executorable{ ...@@ -117,10 +117,9 @@ public class Executor<P: PrecisionProtocol>: Executorable{
//将输入写进文件 //将输入写进文件
/* /*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim) print(dim)
writeToLibrary(fileName: "yolo_input", array: inputArr) writeToLibrary(fileName: "mobilenet_input", array: inputArr)
print(" write done ") print(" write done ")
return return
*/ */
......
...@@ -27,6 +27,78 @@ func getUniqueKey() -> String { ...@@ -27,6 +27,78 @@ func getUniqueKey() -> String {
return UUID.init().uuidString return UUID.init().uuidString
} }
@available(iOS 11.0, *)
class ConvDataSource<P: PrecisionProtocol>: NSObject, MPSCNNConvolutionDataSource {
var _descriptor: MPSCNNConvolutionDescriptor
var _weightsTensor: Tensor<P>
var _biasTensor: Tensor<P>
var _biasTerms: UnsafeMutablePointer<Float>?
func load() -> Bool {
switch P.precisionType {
case .Float32:
_biasTerms = _biasTensor.data.pointer as? UnsafeMutablePointer<Float>
case .Float16:
_biasTerms = UnsafeMutablePointer<Float>.allocate(capacity: _biasTensor.data.count)
if let float16Point = _biasTensor.data.pointer as? UnsafeMutablePointer<Float16> {
float16to32(input: float16Point, output: _biasTerms!, count: _biasTensor.data.count)
}
}
return true
}
func purge() {
switch P.precisionType {
case .Float32:
return
case .Float16:
_biasTerms?.deinitialize(count: _biasTensor.data.count)
_biasTerms?.deallocate()
}
}
func label() -> String? {
return "conv_add_label"
}
func copy(with zone: NSZone? = nil) -> Any {
return self
}
init(inDesc: MPSCNNConvolutionDescriptor,
inWeights: Tensor<P>,
inBiasTerms: Tensor<P>) {
_descriptor = inDesc
_weightsTensor = inWeights
_biasTensor = inBiasTerms
super.init()
}
func descriptor() -> MPSCNNConvolutionDescriptor {
return _descriptor
}
func dataType() -> MPSDataType {
switch P.precisionType {
case .Float32:
return .float32
case .Float16:
return .float16
}
}
func weights() -> UnsafeMutableRawPointer {
return UnsafeMutableRawPointer.init(_weightsTensor.data.pointer)
}
func biasTerms() -> UnsafeMutablePointer<Float>? {
return _biasTerms
}
}
class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable { class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
...@@ -40,30 +112,37 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable { ...@@ -40,30 +112,37 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
let offsetX = (Int(param.dilations[0]) * (param.filter.tensorDim[3] - 1) + 1)/2 - Int(param.paddings[0]) let offsetX = (Int(param.dilations[0]) * (param.filter.tensorDim[3] - 1) + 1)/2 - Int(param.paddings[0])
let key = identifyingKey let key = identifyingKey
if initContext.useMPS {
if #available(iOS 10.0, *) {
if !(param.filter.tensorDim[1] == 1 && param.filter.tensorDim[0] == param.input.tensorDim[1]) && param.input.tensorDim[1] > 4 && param.output.tensorDim[1] > 4 {
let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3], if initContext.useMPS { // 使用 apple 的 MetalPerformanceShaders
if #available(iOS 11.0, *) {
var desc: MPSCNNConvolutionDescriptor?
// 如果不是 depth wise, 并且输入输出 tensor channel 都大于 4
if !(param.filter.tensorDim[1] == 1 && param.filter.tensorDim[0] == param.input.tensorDim[1]) && param.input.tensorDim[1] > 4 && param.output.tensorDim[1] > 4 {
desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3],
kernelHeight: param.filter.tensorDim[2],
inputFeatureChannels: param.input.tensorDim[1],
outputFeatureChannels: param.output.tensorDim[1],
neuronFilter: nil)
desc?.strideInPixelsX = Int(param.stride[0])
desc?.strideInPixelsY = Int(param.stride[1])
} else if param.input.tensorDim[1] > 4 && param.output.tensorDim[1] > 4 {
desc = MPSCNNDepthWiseConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3],
kernelHeight: param.filter.tensorDim[2], kernelHeight: param.filter.tensorDim[2],
inputFeatureChannels: param.input.tensorDim[1], inputFeatureChannels: param.input.tensorDim[1],
outputFeatureChannels: param.output.tensorDim[1], outputFeatureChannels: param.output.tensorDim[1],
neuronFilter: nil) neuronFilter: nil)
desc.strideInPixelsX = Int(param.stride[0])
desc.strideInPixelsY = Int(param.stride[1])
let tensorPointer = param.filter.convert(converter: MPSPointerConverter<P>.init()) }
let yPointer = param.y.data.pointer
tensorPointer.withMemoryRebound(to: Float.self, capacity: param.filter.numel()) { (weightPointer: UnsafeMutablePointer<Float>) in desc?.strideInPixelsX = Int(param.stride[0])
yPointer.withMemoryRebound(to: Float.self, capacity: param.y.numel(), { (biasePointer: UnsafeMutablePointer<Float>) in desc?.strideInPixelsY = Int(param.stride[1])
let conv = MPSCNNConvolution.init(device: device, convolutionDescriptor: desc, kernelWeights: weightPointer, biasTerms: biasePointer, flags: .none) if let inDesc = desc {
let _ = param.filter.convert(converter: MPSPointerConverter<P>.init())
let dataSource = ConvDataSource.init(inDesc: inDesc, inWeights: param.filter, inBiasTerms: param.y)
let conv = MPSCNNConvolution.init(device: device, weights: dataSource)
conv.offset = MPSOffset.init(x: offsetX, y: offsetY, z: 0) conv.offset = MPSOffset.init(x: offsetX, y: offsetY, z: 0)
conv.edgeMode = .zero conv.edgeMode = .zero
convDic[key] = conv convDic[key] = conv
})
}
imageDic[identifyingKey + "_input"] = MPSImage.init(texture: param.input.metalTexture, featureChannels: param.input.tensorDim[1]) imageDic[identifyingKey + "_input"] = MPSImage.init(texture: param.input.metalTexture, featureChannels: param.input.tensorDim[1])
imageDic[identifyingKey + "_output"] = MPSImage.init(texture: param.output.metalTexture, featureChannels: param.output.tensorDim[1]) imageDic[identifyingKey + "_output"] = MPSImage.init(texture: param.output.metalTexture, featureChannels: param.output.tensorDim[1])
super.init(device: device, inFunctionName: "place_holder", initContext: initContext) super.init(device: device, inFunctionName: "place_holder", initContext: initContext)
......
...@@ -25,11 +25,11 @@ int main() { ...@@ -25,11 +25,11 @@ int main() {
paddle_mobile.SetCLPath("/data/local/tmp/bin"); paddle_mobile.SetCLPath("/data/local/tmp/bin");
#endif #endif
// auto isok = auto isok = paddle_mobile.Load(
// paddle_mobile.Load(std::string(g_mobilenet_mul) + "/model", std::string(g_mobilenet_vision) + "/vision_mobilenet_model",
// std::string(g_mobilenet_mul) + "/params", true); std::string(g_mobilenet_vision) + "/vision_mobilenet_params", true);
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true); // auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if (isok) { if (isok) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
...@@ -37,12 +37,13 @@ int main() { ...@@ -37,12 +37,13 @@ int main() {
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims); GetInput<float>(g_test_image_1x3x224x224_vision_mobilenet_input, &input,
dims);
std::vector<float> vec_result = paddle_mobile.Predict(input, dims); std::vector<float> vec_result = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time(); auto time3 = paddle_mobile::time();
int max = 10; int max = 1;
for (int i = 0; i < max; ++i) { for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims); vec_result = paddle_mobile.Predict(input, dims);
} }
......
...@@ -20,14 +20,18 @@ int main() { ...@@ -20,14 +20,18 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4); paddle_mobile.SetThreadNum(4);
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
std::string(g_mobilenet_combined) + "/params", true)) { if (paddle_mobile.Load(
std::string(g_mobilenet_vision) + "/vision_mobilenet_model",
std::string(g_mobilenet_vision) + "/vision_mobilenet_params", true)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
GetInput<float>(g_test_image_1x3x224x224_vision_mobilenet_input, &input,
dims);
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest = std::vector<float>::iterator biggest =
...@@ -39,8 +43,9 @@ int main() { ...@@ -39,8 +43,9 @@ int main() {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
} }
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
} }
auto time4 = time(); auto time4 = time();
......
...@@ -23,15 +23,15 @@ int main() { ...@@ -23,15 +23,15 @@ int main() {
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model", if (paddle_mobile.Load(std::string(g_yolo_vision) + "/model",
std::string(g_yolo_combined) + "/params", true)) { std::string(g_yolo_vision) + "/params", true)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 3, 416, 416}; std::vector<int64_t> dims{1, 3, 416, 416};
std::vector<float> input; std::vector<float> input;
GetInput<float>(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims); GetInput<float>(g_test_image_1x3x416x416_vision_yolo_input, &input, dims);
std::cout << "input.size(): " << input.size() << std::endl; std::cout << "input.size(): " << input.size() << std::endl;
for (int j = 0; j < 100; ++j) { for (int j = 0; j < 100; ++j) {
std::cout << j << " : " << input[j] << std::endl; std::cout << j << " : " << input[j] << std::endl;
...@@ -42,13 +42,6 @@ int main() { ...@@ -42,13 +42,6 @@ int main() {
// } // }
auto time3 = time(); auto time3 = time();
const vector<float> vector_out = paddle_mobile.Predict(input, dims); const vector<float> vector_out = paddle_mobile.Predict(input, dims);
std::cout << "--------------------------------------------" << std::endl;
for (float i : vector_out) {
std::cout << i << std::endl;
}
std::cout << "--------------------------------------------" << std::endl;
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <iostream> #include <iostream>
#include <thread> #include <thread> // NOLINT
#include "../../src/common/types.h" #include "../../src/common/types.h"
#include "../../src/io/paddle_test_inference_api.h" #include "../../src/io/paddle_test_inference_api.h"
#include "../test_helper.h" #include "../test_helper.h"
...@@ -31,8 +31,9 @@ void t1() { ...@@ -31,8 +31,9 @@ void t1() {
paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin"); paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
#endif #endif
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model", auto isok =
std::string(g_yolo_mul) + "/params", true); paddle_mobile_gpu.Load(std::string(g_yolo_vision) + "/model",
std::string(g_yolo_vision) + "/params", true);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true); // auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
if (isok) { if (isok) {
...@@ -42,13 +43,13 @@ void t1() { ...@@ -42,13 +43,13 @@ void t1() {
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 416, 416}; std::vector<int64_t> dims{1, 3, 416, 416};
GetInput<float>(g_yolo_img, &input, dims); GetInput<float>(g_test_image_1x3x416x416_vision_yolo_input, &input, dims);
std::vector<float> vec_result; std::vector<float> vec_result;
// = paddle_mobile.Predict(input, dims); // = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time(); auto time3 = paddle_mobile::time();
int max = 10; int max = 1;
for (int i = 0; i < max; ++i) { for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile_gpu.Predict(input, dims); vec_result = paddle_mobile_gpu.Predict(input, dims);
} }
...@@ -129,9 +130,9 @@ void t2() { ...@@ -129,9 +130,9 @@ void t2() {
void t3() { void t3() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
// paddle_mobile.SetThreadNum(4); // paddle_mobile.SetThreadNum(4);
//#ifdef PADDLE_MOBILE_CL // #ifdef PADDLE_MOBILE_CL
// paddle_mobile.SetCLPath("/data/local/tmp/bin"); // paddle_mobile.SetCLPath("/data/local/tmp/bin");
//#endif // #endif
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model", auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
std::string(g_yolo_mul) + "/params", true); std::string(g_yolo_mul) + "/params", true);
......
...@@ -51,6 +51,8 @@ static const char *g_yolo_combined = "../models/yolo_combined"; ...@@ -51,6 +51,8 @@ static const char *g_yolo_combined = "../models/yolo_combined";
static const char *g_yolo_mul = "../models/d"; static const char *g_yolo_mul = "../models/d";
static const char *g_fluid_fssd_new = "../models/fluid_fssd_new"; static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
static const char *g_vgg16_ssd_combined = "../models/vgg16_ssd_combined"; static const char *g_vgg16_ssd_combined = "../models/vgg16_ssd_combined";
static const char *g_mobilenet_vision = "../models/vision_mobilenet";
static const char *g_yolo_vision = "../models/vision_yolo";
static const char *g_test_image_1x3x224x224 = static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float"; "../images/test_image_1x3x224x224_float";
static const char *g_test_image_1x3x224x224_banana = static const char *g_test_image_1x3x224x224_banana =
...@@ -65,10 +67,14 @@ static const char *g_img = "../images/img.bin"; ...@@ -65,10 +67,14 @@ static const char *g_img = "../images/img.bin";
static const char *g_yolo_img = "../images/in_put_1_3_416_416_2"; static const char *g_yolo_img = "../images/in_put_1_3_416_416_2";
static const char *g_super_img = "../images/mingren_input_data"; static const char *g_super_img = "../images/mingren_input_data";
static const char *g_mobilenet_img = "../images/image"; static const char *g_mobilenet_img = "../images/image";
static const char *g_test_image_1x3x224x224_vision_mobilenet_input =
"../images/vision_mobilenet_input";
static const char *g_test_image_1x3x416x416_vision_yolo_input =
"../images/yolo_input";
using paddle_mobile::framework::DDim; using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Tensor;
using namespace paddle_mobile; using namespace paddle_mobile; // NOLINT
template <typename T> template <typename T>
void SetupTensor(paddle_mobile::framework::Tensor *input, void SetupTensor(paddle_mobile::framework::Tensor *input,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册