提交 ad8c0911 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1481 from codeWorm2015/develop

add mps support
......@@ -33,8 +33,6 @@
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; };
FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; };
FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; };
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */; };
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */; };
FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; };
FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
......@@ -49,6 +47,9 @@
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */ = {isa = PBXBuildFile; fileRef = FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */; };
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; };
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */; };
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */; };
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCE834B02232B6DC0057BF43 /* vision_synset.txt */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
......@@ -105,8 +106,6 @@
FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; };
FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; };
FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; };
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
......@@ -121,6 +120,9 @@
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCC15E14221E716400DC3CB2 /* paddle-mobile-metallib.metallib */ = {isa = PBXFileReference; lastKnownFileType = "archive.metal-library"; name = "paddle-mobile-metallib.metallib"; path = "../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"; sourceTree = "<group>"; };
FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; };
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FCE834B02232B6DC0057BF43 /* vision_synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = vision_synset.txt; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
FCFADE33222F63CB0037DCE8 /* test_big.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test_big.JPG; sourceTree = "<group>"; };
......@@ -267,22 +269,13 @@
FC704C1B21D237FC00F98BAB /* vision_model */ = {
isa = PBXGroup;
children = (
FCE834AB2232A4AE0057BF43 /* vision_mobilenet */,
FCAFD8482231614200496A36 /* yolo_16 */,
FC704C1C21D237FC00F98BAB /* mobilenet */,
FC704C1F21D237FC00F98BAB /* yolo */,
);
path = vision_model;
sourceTree = "<group>";
};
FC704C1C21D237FC00F98BAB /* mobilenet */ = {
isa = PBXGroup;
children = (
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */,
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */,
);
path = mobilenet;
sourceTree = "<group>";
};
FC704C1F21D237FC00F98BAB /* yolo */ = {
isa = PBXGroup;
children = (
......@@ -336,6 +329,16 @@
path = yolo_16;
sourceTree = "<group>";
};
FCE834AB2232A4AE0057BF43 /* vision_mobilenet */ = {
isa = PBXGroup;
children = (
FCE834B02232B6DC0057BF43 /* vision_synset.txt */,
FCE834AC2232A4AE0057BF43 /* combined_mobilenet_params */,
FCE834AD2232A4AE0057BF43 /* combined_mobilenet_model */,
);
path = vision_mobilenet;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
......@@ -401,8 +404,8 @@
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */,
FCAFD84B2231614200496A36 /* yolo_16_param in Resources */,
FCE834AF2232A4AE0057BF43 /* combined_mobilenet_model in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */,
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
......@@ -411,14 +414,15 @@
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */,
FC704C1A21D2375300F98BAB /* super_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FCE834B12232B6DC0057BF43 /* vision_synset.txt in Resources */,
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */,
FCAFD84C2231614200496A36 /* yolo_16_model in Resources */,
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */,
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */,
FCC15E15221E716500DC3CB2 /* paddle-mobile-metallib.metallib in Resources */,
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */,
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */,
FC704C2421D237FC00F98BAB /* yolo_params in Resources */,
FCE834AE2232A4AE0057BF43 /* combined_mobilenet_params in Resources */,
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */,
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */,
FC704C2521D237FC00F98BAB /* yolo_model in Resources */,
......
......@@ -24,10 +24,35 @@ public class MobileNetCombined: Net {
inputDim = Dim.init(inDim: [1, 224, 224, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = true
}
let labels = PreWords.init(fileName: "vision_synset")
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}
override public func resultStr(res: [ResultHolder]) -> String {
return " \(res[0].result[0]) ... "
let firstRes = res[0]
let resPointer = firstRes.result
var s: [String] = []
(0..<firstRes.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
}
......@@ -25,7 +25,7 @@ public class YoloNet: Net {
inputDim = Dim.init(inDim: [1, 416, 416, 3])
metalLoadMode = .LoadMetalInCustomMetalLib
metalLibPath = Bundle.main.path(forResource: "paddle-mobile-metallib", ofType: "metallib")
useMPS = false
useMPS = true
paramPrecision = .Float16
}
......
......@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
float4 output = float4(biase[gid.z]);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
......@@ -385,7 +385,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
......@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z];
float4 output = float4(biase[gid.z]);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
outTexture.write(half4(output), gid.xy, gid.z);
}
......@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
float4 output = float4(biase[gid.z]);
ushort dilation_y = param.dilationY;
half4 input[5];
......@@ -471,20 +471,20 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
output.w += dot(float4(input[j]), float4(weight_w));
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
outTexture.write(half4(output), gid.xy, gid.z);
}
......@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
float4 output = float4(biase[gid.z]);
ushort dilation_x = param.dilationX;
half4 input[5];
......@@ -530,20 +530,20 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
output.w += dot(float4(input[j]), float4(weight_w));
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
outTexture.write(half4(output), gid.xy, gid.z);
}
......
......@@ -117,10 +117,9 @@ public class Executor<P: PrecisionProtocol>: Executorable{
//将输入写进文件
/*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim)
writeToLibrary(fileName: "yolo_input", array: inputArr)
writeToLibrary(fileName: "mobilenet_input", array: inputArr)
print(" write done ")
return
*/
......
......@@ -27,6 +27,78 @@ func getUniqueKey() -> String {
return UUID.init().uuidString
}
@available(iOS 11.0, *)
class ConvDataSource<P: PrecisionProtocol>: NSObject, MPSCNNConvolutionDataSource {
var _descriptor: MPSCNNConvolutionDescriptor
var _weightsTensor: Tensor<P>
var _biasTensor: Tensor<P>
var _biasTerms: UnsafeMutablePointer<Float>?
func load() -> Bool {
switch P.precisionType {
case .Float32:
_biasTerms = _biasTensor.data.pointer as? UnsafeMutablePointer<Float>
case .Float16:
_biasTerms = UnsafeMutablePointer<Float>.allocate(capacity: _biasTensor.data.count)
if let float16Point = _biasTensor.data.pointer as? UnsafeMutablePointer<Float16> {
float16to32(input: float16Point, output: _biasTerms!, count: _biasTensor.data.count)
}
}
return true
}
func purge() {
switch P.precisionType {
case .Float32:
return
case .Float16:
_biasTerms?.deinitialize(count: _biasTensor.data.count)
_biasTerms?.deallocate()
}
}
func label() -> String? {
return "conv_add_label"
}
func copy(with zone: NSZone? = nil) -> Any {
return self
}
init(inDesc: MPSCNNConvolutionDescriptor,
inWeights: Tensor<P>,
inBiasTerms: Tensor<P>) {
_descriptor = inDesc
_weightsTensor = inWeights
_biasTensor = inBiasTerms
super.init()
}
func descriptor() -> MPSCNNConvolutionDescriptor {
return _descriptor
}
func dataType() -> MPSDataType {
switch P.precisionType {
case .Float32:
return .float32
case .Float16:
return .float16
}
}
func weights() -> UnsafeMutableRawPointer {
return UnsafeMutableRawPointer.init(_weightsTensor.data.pointer)
}
func biasTerms() -> UnsafeMutablePointer<Float>? {
return _biasTerms
}
}
class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
var metalParam: MetalConvParam!
......@@ -40,30 +112,37 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
let offsetX = (Int(param.dilations[0]) * (param.filter.tensorDim[3] - 1) + 1)/2 - Int(param.paddings[0])
let key = identifyingKey
if initContext.useMPS {
if #available(iOS 10.0, *) {
if initContext.useMPS { // 使用 apple 的 MetalPerformanceShaders
if #available(iOS 11.0, *) {
var desc: MPSCNNConvolutionDescriptor?
// 如果不是 depth wise, 并且输入输出 tensor channel 都大于 4
if !(param.filter.tensorDim[1] == 1 && param.filter.tensorDim[0] == param.input.tensorDim[1]) && param.input.tensorDim[1] > 4 && param.output.tensorDim[1] > 4 {
let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3],
desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3],
kernelHeight: param.filter.tensorDim[2],
inputFeatureChannels: param.input.tensorDim[1],
outputFeatureChannels: param.output.tensorDim[1],
neuronFilter: nil)
desc.strideInPixelsX = Int(param.stride[0])
desc.strideInPixelsY = Int(param.stride[1])
let tensorPointer = param.filter.convert(converter: MPSPointerConverter<P>.init())
let yPointer = param.y.data.pointer
tensorPointer.withMemoryRebound(to: Float.self, capacity: param.filter.numel()) { (weightPointer: UnsafeMutablePointer<Float>) in
yPointer.withMemoryRebound(to: Float.self, capacity: param.y.numel(), { (biasePointer: UnsafeMutablePointer<Float>) in
let conv = MPSCNNConvolution.init(device: device, convolutionDescriptor: desc, kernelWeights: weightPointer, biasTerms: biasePointer, flags: .none)
conv.offset = MPSOffset.init(x: offsetX, y: offsetY, z: 0)
conv.edgeMode = .zero
convDic[key] = conv
})
}
desc?.strideInPixelsX = Int(param.stride[0])
desc?.strideInPixelsY = Int(param.stride[1])
} else if param.input.tensorDim[1] > 4 && param.output.tensorDim[1] > 4 {
desc = MPSCNNDepthWiseConvolutionDescriptor(kernelWidth: param.filter.tensorDim[3],
kernelHeight: param.filter.tensorDim[2],
inputFeatureChannels: param.input.tensorDim[1],
outputFeatureChannels: param.output.tensorDim[1],
neuronFilter: nil)
}
desc?.strideInPixelsX = Int(param.stride[0])
desc?.strideInPixelsY = Int(param.stride[1])
if let inDesc = desc {
let _ = param.filter.convert(converter: MPSPointerConverter<P>.init())
let dataSource = ConvDataSource.init(inDesc: inDesc, inWeights: param.filter, inBiasTerms: param.y)
let conv = MPSCNNConvolution.init(device: device, weights: dataSource)
conv.offset = MPSOffset.init(x: offsetX, y: offsetY, z: 0)
conv.edgeMode = .zero
convDic[key] = conv
imageDic[identifyingKey + "_input"] = MPSImage.init(texture: param.input.metalTexture, featureChannels: param.input.tensorDim[1])
imageDic[identifyingKey + "_output"] = MPSImage.init(texture: param.output.metalTexture, featureChannels: param.output.tensorDim[1])
super.init(device: device, inFunctionName: "place_holder", initContext: initContext)
......
......@@ -25,11 +25,11 @@ int main() {
paddle_mobile.SetCLPath("/data/local/tmp/bin");
#endif
// auto isok =
// paddle_mobile.Load(std::string(g_mobilenet_mul) + "/model",
// std::string(g_mobilenet_mul) + "/params", true);
auto isok = paddle_mobile.Load(
std::string(g_mobilenet_vision) + "/vision_mobilenet_model",
std::string(g_mobilenet_vision) + "/vision_mobilenet_params", true);
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
// auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
......@@ -37,12 +37,13 @@ int main() {
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
GetInput<float>(g_test_image_1x3x224x224_vision_mobilenet_input, &input,
dims);
std::vector<float> vec_result = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time();
int max = 10;
int max = 1;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
}
......
......@@ -20,14 +20,18 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
std::string(g_mobilenet_combined) + "/params", true)) {
if (paddle_mobile.Load(
std::string(g_mobilenet_vision) + "/vision_mobilenet_model",
std::string(g_mobilenet_vision) + "/vision_mobilenet_params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
GetInput<float>(g_test_image_1x3x224x224_vision_mobilenet_input, &input,
dims);
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
......@@ -39,8 +43,9 @@ int main() {
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
for (int i = 0; i < 1; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = time();
......
......@@ -23,15 +23,15 @@ int main() {
// ../../../test/models/mobilenet
auto time1 = time();
if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model",
std::string(g_yolo_combined) + "/params", true)) {
if (paddle_mobile.Load(std::string(g_yolo_vision) + "/model",
std::string(g_yolo_vision) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 3, 416, 416};
std::vector<float> input;
GetInput<float>(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims);
GetInput<float>(g_test_image_1x3x416x416_vision_yolo_input, &input, dims);
std::cout << "input.size(): " << input.size() << std::endl;
for (int j = 0; j < 100; ++j) {
std::cout << j << " : " << input[j] << std::endl;
......@@ -42,13 +42,6 @@ int main() {
// }
auto time3 = time();
const vector<float> vector_out = paddle_mobile.Predict(input, dims);
std::cout << "--------------------------------------------" << std::endl;
for (float i : vector_out) {
std::cout << i << std::endl;
}
std::cout << "--------------------------------------------" << std::endl;
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <thread>
#include <thread> // NOLINT
#include "../../src/common/types.h"
#include "../../src/io/paddle_test_inference_api.h"
#include "../test_helper.h"
......@@ -31,8 +31,9 @@ void t1() {
paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
#endif
auto time1 = paddle_mobile::time();
auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
std::string(g_yolo_mul) + "/params", true);
auto isok =
paddle_mobile_gpu.Load(std::string(g_yolo_vision) + "/model",
std::string(g_yolo_vision) + "/params", true);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
if (isok) {
......@@ -42,13 +43,13 @@ void t1() {
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 416, 416};
GetInput<float>(g_yolo_img, &input, dims);
GetInput<float>(g_test_image_1x3x416x416_vision_yolo_input, &input, dims);
std::vector<float> vec_result;
// = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time();
int max = 10;
int max = 1;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile_gpu.Predict(input, dims);
}
......@@ -129,9 +130,9 @@ void t2() {
void t3() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
//#ifdef PADDLE_MOBILE_CL
// #ifdef PADDLE_MOBILE_CL
// paddle_mobile.SetCLPath("/data/local/tmp/bin");
//#endif
// #endif
auto time1 = paddle_mobile::time();
auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
std::string(g_yolo_mul) + "/params", true);
......
......@@ -51,6 +51,8 @@ static const char *g_yolo_combined = "../models/yolo_combined";
static const char *g_yolo_mul = "../models/d";
static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
static const char *g_vgg16_ssd_combined = "../models/vgg16_ssd_combined";
static const char *g_mobilenet_vision = "../models/vision_mobilenet";
static const char *g_yolo_vision = "../models/vision_yolo";
static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float";
static const char *g_test_image_1x3x224x224_banana =
......@@ -65,10 +67,14 @@ static const char *g_img = "../images/img.bin";
static const char *g_yolo_img = "../images/in_put_1_3_416_416_2";
static const char *g_super_img = "../images/mingren_input_data";
static const char *g_mobilenet_img = "../images/image";
static const char *g_test_image_1x3x224x224_vision_mobilenet_input =
"../images/vision_mobilenet_input";
static const char *g_test_image_1x3x416x416_vision_yolo_input =
"../images/yolo_input";
using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Tensor;
using namespace paddle_mobile;
using namespace paddle_mobile; // NOLINT
template <typename T>
void SetupTensor(paddle_mobile::framework::Tensor *input,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册