提交 445317f8 编写于 作者: L liuruilong

support super resulotion fix metal popool op bug

上级 2da6bd2f
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C021D608DF00F2FD90 /* mobilenet_model */; }; FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C021D608DF00F2FD90 /* mobilenet_model */; };
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; }; FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; };
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; }; FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; };
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797CE21D6506F00F2FD90 /* mingren.jpg */; };
FC9797D121D6616600F2FD90 /* BufferToTexture.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC9797D021D6616600F2FD90 /* BufferToTexture.metal */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; }; FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
...@@ -78,6 +80,8 @@ ...@@ -78,6 +80,8 @@
FC9797C021D608DF00F2FD90 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; }; FC9797C021D608DF00F2FD90 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; }; FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; }; FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FC9797CE21D6506F00F2FD90 /* mingren.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = mingren.jpg; sourceTree = "<group>"; };
FC9797D021D6616600F2FD90 /* BufferToTexture.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BufferToTexture.metal; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; }; FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; }; FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
...@@ -158,6 +162,7 @@ ...@@ -158,6 +162,7 @@
FC203FA821CBFDBA00B37166 /* images */ = { FC203FA821CBFDBA00B37166 /* images */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC9797CE21D6506F00F2FD90 /* mingren.jpg */,
FC9797BD21D6045B00F2FD90 /* banana.jpeg */, FC9797BD21D6045B00F2FD90 /* banana.jpeg */,
FC203FA921CBFDBA00B37166 /* test.jpg */, FC203FA921CBFDBA00B37166 /* test.jpg */,
); );
...@@ -226,6 +231,7 @@ ...@@ -226,6 +231,7 @@
children = ( children = (
FC013927210204A3008100E3 /* PreProcessKernel.metal */, FC013927210204A3008100E3 /* PreProcessKernel.metal */,
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */, FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
FC9797D021D6616600F2FD90 /* BufferToTexture.metal */,
); );
path = Net; path = Net;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -303,6 +309,7 @@ ...@@ -303,6 +309,7 @@
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */, FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */, FC704C1921D2375300F98BAB /* super_params in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
...@@ -373,6 +380,7 @@ ...@@ -373,6 +380,7 @@
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */, FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */, C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */, FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
FC9797D121D6616600F2FD90 /* BufferToTexture.metal in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
......
//
// RGBToYCrCb_Y.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/12/28.
// Copyright © 2018 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void buffer_to_texture_kernel( const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half( const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
...@@ -112,6 +112,7 @@ ...@@ -112,6 +112,7 @@
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; }; FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; };
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; }; FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; };
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; }; FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; };
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */; };
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; }; FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; }; FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; }; FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
...@@ -254,6 +255,7 @@ ...@@ -254,6 +255,7 @@
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; }; FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; };
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; }; FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; };
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; }; FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; };
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.inc.metal; sourceTree = "<group>"; };
FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; }; FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; }; FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; }; FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
...@@ -535,6 +537,7 @@ ...@@ -535,6 +537,7 @@
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */, FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */,
FC0226552138F33800F395E2 /* TransposeKernel.metal */, FC0226552138F33800F395E2 /* TransposeKernel.metal */,
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */, 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */,
FC0226572138F38D00F395E2 /* PoolKernel.metal */, FC0226572138F38D00F395E2 /* PoolKernel.metal */,
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */, FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */, FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
...@@ -707,6 +710,7 @@ ...@@ -707,6 +710,7 @@
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */, FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */, FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */, FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */, FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */, FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
......
...@@ -41,7 +41,7 @@ public class Genet: Net { ...@@ -41,7 +41,7 @@ public class Genet: Net {
class GenetPreProccess: CusomKernel { class GenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3) let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
} }
} }
......
...@@ -18,7 +18,7 @@ public class MobileNet: Net{ ...@@ -18,7 +18,7 @@ public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel { class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false)
} }
} }
......
...@@ -17,11 +17,11 @@ public class MobileNetCombined: Net { ...@@ -17,11 +17,11 @@ public class MobileNetCombined: Net {
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
modelDir = "" modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device) inputDim_ = Dim.init(inDim: [1, 224, 224, 3])
inputDim_ = Dim.init(inDim: [1, 416, 416, 3])
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [0, 0, 0] means = [0, 0, 0]
scale = 1 scale = 1
...@@ -29,9 +29,7 @@ public class MobileNetCombined: Net { ...@@ -29,9 +29,7 @@ public class MobileNetCombined: Net {
modelPath = "" modelPath = ""
paramPath = "" paramPath = ""
modelDir = "" modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device) inputDim_ = Dim.init(inDim: [1, 224, 224, 3])
inputDim_ = Dim.init(inDim: [1, 416, 416, 3])
} }
// class GenetPreProccess: CusomKernel { // class GenetPreProccess: CusomKernel {
......
...@@ -41,7 +41,7 @@ public class MobileNet_ssd_hand: Net{ ...@@ -41,7 +41,7 @@ public class MobileNet_ssd_hand: Net{
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
} }
} }
......
...@@ -41,7 +41,7 @@ public class MobileNet_ssd_AR: Net{ ...@@ -41,7 +41,7 @@ public class MobileNet_ssd_AR: Net{
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3) let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false)
} }
} }
......
...@@ -14,6 +14,14 @@ ...@@ -14,6 +14,14 @@
import Foundation import Foundation
class SuperResolutionPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "super_resolution_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
public class SuperResolutionNet: Net{ public class SuperResolutionNet: Net{
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: ResultHolder) -> String {
return "未实现" return "未实现"
......
...@@ -18,7 +18,7 @@ public class YoloNet: Net { ...@@ -18,7 +18,7 @@ public class YoloNet: Net {
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
modelDir = "" modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device) // preprocessKernel = GenetPreProccess.init(device: device)
inputDim_ = Dim.init(inDim: [1, 416, 416, 3]) inputDim_ = Dim.init(inDim: [1, 416, 416, 3])
} }
......
...@@ -46,17 +46,60 @@ open class Kernel { ...@@ -46,17 +46,60 @@ open class Kernel {
} }
} }
open class CusomKernel: Kernel { public struct Shape {
public struct Shape { public let width: Int
public let width: Int public let height: Int
public let height: Int public let channel: Int
public let channel: Int public init(inWidth: Int, inHeight: Int, inChannel: Int){
public init(inWidth: Int, inHeight: Int, inChannel: Int){ width = inWidth
width = inWidth height = inHeight
height = inHeight channel = inChannel
channel = inChannel }
}
open class BufferToTextureKernel: Kernel {
public let outputTexture: MTLTexture
public init(device: MTLDevice, outputDim: Shape, usePaddleMobileLib: Bool = false) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
if computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
} else {
fatalError()
} }
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel", usePaddleMobileLib: usePaddleMobileLib)
} else {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel_half", usePaddleMobileLib: usePaddleMobileLib)
}
}
public func compute(inputBuffer: MTLBuffer , commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setBuffer(inputBuffer, offset: 0, index: 0)
encoder.setTexture(outputTexture, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
encoder.endEncoding()
} }
}
open class CusomKernel: Kernel {
public let outputTexture: MTLTexture public let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) { public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
let textureDesc = MTLTextureDescriptor.init() let textureDesc = MTLTextureDescriptor.init()
......
...@@ -49,7 +49,7 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -49,7 +49,7 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
) )
if computePrecision == .Float32 { if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool") super.init(device: device, inFunctionName: "pool_float")
} else if computePrecision == .Float16 { } else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half") super.init(device: device, inFunctionName: "pool_half")
} else { } else {
......
...@@ -24,6 +24,6 @@ using namespace metal; ...@@ -24,6 +24,6 @@ using namespace metal;
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) #define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n) #define VECTOR(p, n) CONCAT2(p, n)
#define FUNC2_(a, b) CONCAT2_(a, b)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c) #define FUNC3_(a, b, c) CONCAT3_(a, b, c)
//
// PoolKernel.inc.metal
// paddle-mobile
//
// Created by liuRuiLong on 2018/12/29.
// Copyright © 2018 orange. All rights reserved.
//
#ifdef P
kernel void FUNC2_(pool, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
VECTOR(P, 4) r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= (xmax - xmin) * (ymax - ymin);
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
limitations under the License. */ limitations under the License. */
#include <metal_stdlib> #include <metal_stdlib>
#include "Common.metal" #include "Macro.metal"
using namespace metal; using namespace metal;
struct PoolParam { struct PoolParam {
...@@ -26,68 +27,10 @@ struct PoolParam { ...@@ -26,68 +27,10 @@ struct PoolParam {
int poolType; int poolType;
}; };
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]], #define P float
texture2d_array<float, access::write> outTexture [[texture(1)]], #import "PoolKernel.inc.metal"
constant PoolParam &pm [[buffer(0)]], #undef P
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
float4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]], #define P half
texture2d_array<half, access::write> outTexture [[texture(1)]], #import "PoolKernel.inc.metal"
constant PoolParam &pm [[buffer(0)]], #undef P
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
...@@ -51,7 +51,10 @@ class ScaleKernel: CusomKernel { ...@@ -51,7 +51,10 @@ class ScaleKernel: CusomKernel {
} }
numel = net.inputDim.numel() numel = net.inputDim.numel()
meansNumber = net.means.map { NSNumber.init(value: $0) } meansNumber = net.means.map {
NSNumber.init(value: $0)
}
dimsNum = [NSNumber.init(value: net.inputDim[0]), dimsNum = [NSNumber.init(value: net.inputDim[0]),
NSNumber.init(value: net.inputDim[3]), NSNumber.init(value: net.inputDim[3]),
NSNumber.init(value: net.inputDim[1]), NSNumber.init(value: net.inputDim[1]),
...@@ -119,6 +122,29 @@ class ScaleKernel: CusomKernel { ...@@ -119,6 +122,29 @@ class ScaleKernel: CusomKernel {
scaleTexture(input: texture!, complete: getTexture) scaleTexture(input: texture!, complete: getTexture)
} }
@objc public func getTexture(inBuffer: MTLBuffer, getTexture: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
fatalError( " queue or devcie nil " )
}
guard let buffer = inQueue.makeCommandBuffer() else {
fatalError( " make buffer error" )
}
let bufferToTextureKernel = BufferToTextureKernel.init(device: inDevice, outputDim: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: net.inputDim[3]))
do {
try bufferToTextureKernel.compute(inputBuffer: inBuffer, commandBuffer: buffer)
} catch {
fatalError(" bufferToTextureKernel error ")
}
buffer.addCompletedHandler { (buffer) in
getTexture(bufferToTextureKernel.outputTexture)
}
buffer.commit()
}
public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) { public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else { guard let inQueue = queue, let inDevice = device else {
...@@ -129,7 +155,7 @@ class ScaleKernel: CusomKernel { ...@@ -129,7 +155,7 @@ class ScaleKernel: CusomKernel {
fatalError( " make buffer error" ) fatalError( " make buffer error" )
} }
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: 3)) let scaleKernel = ScaleKernel.init(device: inDevice, shape: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: 3))
do { do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer) try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
......
...@@ -25,8 +25,7 @@ public class GPUResultHolder { ...@@ -25,8 +25,7 @@ public class GPUResultHolder {
public let capacity: Int public let capacity: Int
public var resultPointer: UnsafeMutablePointer<Float32>? public var resultPointer: UnsafeMutablePointer<Float32>?
public var intermediateResults: [String : [Variant]]? public var intermediateResults: [String : [Variant]]?
public let elapsedTime: Double public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inIntermediateResults: [String : [Variant]]? = nil) {
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
dim = inDim dim = inDim
capacity = inCapacity capacity = inCapacity
...@@ -35,7 +34,6 @@ public class GPUResultHolder { ...@@ -35,7 +34,6 @@ public class GPUResultHolder {
resultPointer?.initialize(from: inInPointer, count: inCapacity) resultPointer?.initialize(from: inInPointer, count: inCapacity)
} }
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults intermediateResults = inIntermediateResults
} }
...@@ -124,7 +122,6 @@ public class Executor<P: PrecisionType> { ...@@ -124,7 +122,6 @@ public class Executor<P: PrecisionType> {
resInput = input resInput = input
} }
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: dim) let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: dim)
program.scope.setInput(input: inputTexture) program.scope.setInput(input: inputTexture)
//(ops.count - except) //(ops.count - except)
...@@ -150,28 +147,28 @@ public class Executor<P: PrecisionType> { ...@@ -150,28 +147,28 @@ public class Executor<P: PrecisionType> {
//将输入写进文件 //将输入写进文件
/* /*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim) print(dim)
writeToLibrary(fileName: "test_image_yolo", array: inputArr) writeToLibrary(fileName: "test_image_mingren", array: inputArr)
print(" write done ") print(" write done ")
return return
*/ */
/* 输出 op 计算结果 /* 输出 op 计算结果
for op in SSelf.ops { for op in SSelf.ops {
op.delogOutput() op.delogOutput()
} }
*/ */
let afterDate = Date.init()
var resultHolder: GPUResultHolder var resultHolder: GPUResultHolder
if except > 0 { if except > 0 {
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures) resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inIntermediateResults: outputTextures)
} else { } else {
let outputVar: Variant = SSelf.program.scope.output()! let outputVar: Variant = SSelf.program.scope.output()!
let output: FetchHolder = outputVar as! FetchHolder let output: FetchHolder = outputVar as! FetchHolder
resultHolder = GPUResultHolder.init(inDim: output.dim.dims, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate)) resultHolder = GPUResultHolder.init(inDim: output.dim.dims, inPointer: output.result, inCapacity: output.capacity)
} }
completionHandle(resultHolder) completionHandle(resultHolder)
......
...@@ -96,8 +96,6 @@ public class Texture: Tensorial { ...@@ -96,8 +96,6 @@ public class Texture: Tensorial {
return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
} }
func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) { func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
transpose = inTranspose transpose = inTranspose
for i in 0..<(4 - tensorDim.cout()) { for i in 0..<(4 - tensorDim.cout()) {
......
...@@ -507,7 +507,7 @@ void Executor<Device, T>::Predict_To(int end) { ...@@ -507,7 +507,7 @@ void Executor<Device, T>::Predict_To(int end) {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
template <> template <>
void Executor<GPU_CL, float>::InitNoPersistableMemory( void Executor<GPU_CL, float>::InitNoPersistableMemory(
const LoDTensor &input_tensor) { const Tensor &input_tensor) {
DLOG << "CL InitNoPersistableMemory "; DLOG << "CL InitNoPersistableMemory ";
for (const auto &block : program_desc_->Blocks()) { for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册