提交 ec42180c 编写于 作者: L liuruilong

align result

上级 10944966
......@@ -16,6 +16,7 @@
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; };
FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; };
FCA3A16121313E1F00084FE5 /* hand.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FCA3A16021313E1F00084FE5 /* hand.jpg */; };
FCBCCC522122EEDC00D94F7E /* ssd_hand_params in Resources */ = {isa = PBXBuildFile; fileRef = FCBCCC502122EEDC00D94F7E /* ssd_hand_params */; };
FCBCCC532122EEDC00D94F7E /* ssd_hand_model in Resources */ = {isa = PBXBuildFile; fileRef = FCBCCC512122EEDC00D94F7E /* ssd_hand_model */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
......@@ -56,6 +57,7 @@
FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
FCA3A16021313E1F00084FE5 /* hand.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = hand.jpg; sourceTree = "<group>"; };
FCBCCC502122EEDC00D94F7E /* ssd_hand_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = ssd_hand_params; sourceTree = "<group>"; };
FCBCCC512122EEDC00D94F7E /* ssd_hand_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = ssd_hand_model; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
......@@ -137,6 +139,7 @@
FC0E2C1D20EDC030009C1FAC /* images */ = {
isa = PBXGroup;
children = (
FCA3A16021313E1F00084FE5 /* hand.jpg */,
FC918192211DC70500B6F354 /* iphone.JPG */,
FC918190211DBC3500B6F354 /* paddle-mobile.png */,
FCDFD41A211D91C7005AB38B /* synset.txt */,
......@@ -245,6 +248,7 @@
FCDFD41B211D91C7005AB38B /* synset.txt in Resources */,
FCD04E6420F3146B0007374F /* model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FCA3A16121313E1F00084FE5 /* hand.jpg in Resources */,
FCBCCC532122EEDC00D94F7E /* ssd_hand_model in Resources */,
runOnlyForDeploymentPostprocessing = 0;
......@@ -19,10 +19,10 @@
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" ambiguous="YES" image="hand.jpg" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
<rect key="frame" x="0.0" y="20" width="375" height="247"/>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" ambiguous="YES" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
<rect key="frame" x="10" y="538" width="68" height="24"/>
<constraint firstAttribute="width" constant="68" id="Q5J-tq-JSX"/>
......@@ -32,19 +32,19 @@
<nil key="textColor"/>
<nil key="highlightedColor"/>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
<pickerView contentMode="scaleToFill" ambiguous="YES" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
<rect key="frame" x="88" y="510.5" width="287" height="80"/>
<constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
<pickerView contentMode="scaleToFill" ambiguous="YES" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
<rect key="frame" x="85" y="401" width="290" height="80"/>
<constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" ambiguous="YES" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
<rect key="frame" x="10" y="429" width="65" height="24"/>
<constraint firstAttribute="width" constant="65" id="6oA-g2-Xq4"/>
......@@ -54,7 +54,7 @@
<nil key="textColor"/>
<nil key="highlightedColor"/>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
<button opaque="NO" contentMode="scaleToFill" ambiguous="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
<rect key="frame" x="16" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Image">
......@@ -64,7 +64,7 @@
<action selector="selectImageAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="5uR-SM-fKO"/>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="XpL-9M-UOp">
<button opaque="NO" contentMode="scaleToFill" ambiguous="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="XpL-9M-UOp">
<rect key="frame" x="109.5" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Load">
......@@ -74,7 +74,7 @@
<action selector="loadAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="fZ5-CQ-jCY"/>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="R90-Yf-S6g">
<button opaque="NO" contentMode="scaleToFill" ambiguous="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="R90-Yf-S6g">
<rect key="frame" x="202.5" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Predict">
......@@ -84,7 +84,7 @@
<action selector="predictAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="Iyy-sY-gt4"/>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="a3K-ri-NVs">
<button opaque="NO" contentMode="scaleToFill" ambiguous="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="a3K-ri-NVs">
<rect key="frame" x="296" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Clear">
......@@ -94,7 +94,7 @@
<action selector="clearAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="JYf-UX-rCR"/>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="w7H-Sk-Rai">
<view contentMode="scaleToFill" ambiguous="YES" translatesAutoresizingMaskIntoConstraints="NO" id="w7H-Sk-Rai">
<rect key="frame" x="79.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
......@@ -102,7 +102,7 @@
<constraint firstAttribute="width" constant="30" id="vYd-Fc-KAj"/>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="T4O-nx-ciH">
<view contentMode="scaleToFill" ambiguous="YES" translatesAutoresizingMaskIntoConstraints="NO" id="T4O-nx-ciH">
<rect key="frame" x="266" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
......@@ -110,7 +110,7 @@
<constraint firstAttribute="width" constant="30" id="fXE-S7-ZXL"/>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="976-fk-Kx2">
<view contentMode="scaleToFill" ambiguous="YES" translatesAutoresizingMaskIntoConstraints="NO" id="976-fk-Kx2">
<rect key="frame" x="172.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
......@@ -118,7 +118,7 @@
<constraint firstAttribute="width" constant="30" id="L4p-hP-s5C"/>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="m5L-O7-P31">
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" ambiguous="YES" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="m5L-O7-P31">
<rect key="frame" x="15" y="277" width="350" height="38"/>
<constraint firstAttribute="height" constant="38" id="6SS-sb-7I2"/>
......@@ -133,7 +133,7 @@
<constraint firstAttribute="width" secondItem="4ey-Xr-U4e" secondAttribute="height" multiplier="6.5:1" id="8c5-FF-lB9"/>
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="VQn-bS-fWp">
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" ambiguous="YES" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="VQn-bS-fWp">
<rect key="frame" x="10" y="323" width="355" height="70"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
......@@ -203,6 +203,7 @@
<image name="hand.jpg" width="564" height="664"/>
<image name="paddle-mobile.png" width="402" height="62"/>
......@@ -30,6 +30,7 @@ protocol Net {
var preprocessKernel: CusomKernel { get }
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
func resultStr(res: [Float]) -> String
func fetchResult(paddleMobileRes: ResultHolder<Float32>) -> [Float32]
extension Net {
......@@ -39,10 +40,13 @@ extension Net {
func fetchResult(paddleMobileRes: ResultHolder<Float32>) -> [Float32] {
return paddleMobileRes.resultArr
struct MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
......@@ -100,7 +104,8 @@ struct MobileNet_ssd_hand: Net{
func resultStr(res: [Float]) -> String {
return "哈哈哈, 还没好"
// fatalError()
func bboxArea(box: [Float32], normalized: Bool) -> Float32 {
......@@ -117,7 +122,6 @@ struct MobileNet_ssd_hand: Net{
func jaccardOverLap(box1: [Float32], box2: [Float32], normalized: Bool) -> Float32 {
if box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1] {
......@@ -136,9 +140,11 @@ struct MobileNet_ssd_hand: Net{
func fetchResult(paddleMobileRes: [String : Texture<Float32>]) -> [Float32]{
let bbox = paddleMobileRes["box_coder_0.tmp_0"] ?! " no bbox "
let scores = paddleMobileRes["transpose_12.tmp_0"] ?! " no scores "
func fetchResult(paddleMobileRes: ResultHolder<Float32>) -> [Float32]{
let scores = paddleMobileRes.intermediateResults![0] as! Texture<Float32>
let bbox = paddleMobileRes.intermediateResults![1] as! Texture<Float32>
// let bbox = paddleMobileRes["box_coder_0.tmp_0"] ?! " no bbox "
// let scores = paddleMobileRes["transpose_12.tmp_0"] ?! " no scores "
let score_thredshold: Float32 = 0.01
let nms_top_k = 400
let keep_top_k = 200
......@@ -156,20 +162,29 @@ struct MobileNet_ssd_hand: Net{
var scoreFormatArr: [Float32] = []
var outputArr: [Float32] = []
let numOfOneC = (scores.originDim[2] + 3) / 4 // 480
let cNumOfOneClass = numOfOneC * 4 // 1920
let numOfOneC = (scores.tensorDim[2] + 3) / 4 // 480
let boxSize = bbox.originDim[2] // 4
let classNum = scores.originDim[1] // 7
let cNumOfOneClass = scores.tensorDim[2] // 1917
let cPaddedNumOfOneClass = numOfOneC * 4 // 1920
let boxSize = bbox.tensorDim[2] // 4
let classNum = scores.tensorDim[1] // 7
let classNumOneTexture = classNum * 4 // 28
for c in 0..<classNum {
for n in 0..<numOfOneC {
let to = n * classNumOneTexture + c * 4
scoreFormatArr.append(scoresArr[to + 1])
scoreFormatArr.append(scoresArr[to + 2])
scoreFormatArr.append(scoresArr[to + 3])
if n == numOfOneC - 1 {
for i in 0..<(4 - (cPaddedNumOfOneClass - cNumOfOneClass)) {
scoreFormatArr.append(scoresArr[to + i])
} else {
scoreFormatArr.append(scoresArr[to + 1])
scoreFormatArr.append(scoresArr[to + 2])
scoreFormatArr.append(scoresArr[to + 3])
......@@ -178,13 +193,13 @@ struct MobileNet_ssd_hand: Net{
var numDet: Int = 0
for i in 0..<classNum {
var sliceScore = scoreFormatArr[(i * cNumOfOneClass)..<((i + 1) * cNumOfOneClass)]
var sliceScore = Array<Float32>(scoreFormatArr[(i * cNumOfOneClass)..<((i + 1) * cNumOfOneClass)])
var scoreThresholdArr: [(Float32, Int)] = []
for i in 0..<cNumOfOneClass {
if sliceScore[i] > score_thredshold {
scoreThresholdArr.append((sliceScore[i], i))
for j in 0..<cNumOfOneClass {
if sliceScore[j] > score_thredshold {
scoreThresholdArr.append((sliceScore[j], j))
......@@ -204,7 +219,7 @@ struct MobileNet_ssd_hand: Net{
if keep {
let keptIdx = selectedIndex[j].0
let box1 = Array<Float32>(bboxArr[(idx * boxSize)..<(idx * boxSize + 4)])
let box2 = Array<Float32>(bboxArr[(idx * boxSize)..<(keptIdx * boxSize + 4)])
let box2 = Array<Float32>(bboxArr[(keptIdx * boxSize)..<(keptIdx * boxSize + 4)])
let overlap = jaccardOverLap(box1: box1, box2: box2, normalized: true)
keep = (overlap <= nms_threshold)
......@@ -259,7 +274,8 @@ struct MobileNet_ssd_hand: Net{
outputArr.append(contentsOf: subBox)
print(" fuck success !")
return outputArr
......@@ -75,7 +75,7 @@ class ViewController: UIViewController {
do {
let max = 10
let max = 1
var startDate = Date.init()
for i in 0..<max {
try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in
......@@ -87,14 +87,16 @@ class ViewController: UIViewController {
startDate = Date.init()
let resultArr = sSelf.modelHelper.fetchResult(paddleMobileRes: result)
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: resultArr)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
}, preProcessKernle: self.modelHelper.preprocessKernel)
}, preProcessKernle: self.modelHelper.preprocessKernel, except: 2)
} catch let error {
......@@ -108,7 +110,7 @@ class ViewController: UIViewController {
threadPickerView.delegate = self
threadPickerView.dataSource = self
selectImage = UIImage.init(named: "banana.jpeg")
selectImage = UIImage.init(named: "hand.jpg")
selectImageView.image = selectImage
modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
......@@ -46,6 +46,8 @@
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */; };
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1642132A5EB00084FE5 /* Common.metal */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
......@@ -126,6 +128,8 @@
FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReshapeKernel.metal; sourceTree = "<group>"; };
FCA3A1642132A5EB00084FE5 /* Common.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Common.metal; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
......@@ -349,6 +353,8 @@
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */,
FCA3A1642132A5EB00084FE5 /* Common.metal */,
path = metal;
sourceTree = "<group>";
......@@ -482,6 +488,7 @@
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */,
......@@ -503,6 +510,7 @@
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
......@@ -16,95 +16,110 @@ import Foundation
// 自定义 ?! 如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息
precedencegroup ExecutedOrFatalError{
associativity: left
higherThan: AssignmentPrecedence
associativity: left
higherThan: AssignmentPrecedence
infix operator ?!: ExecutedOrFatalError
public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
if let inOpt = option {
return inOpt
if let inOpt = option {
return inOpt
struct Lense<A, B> {
let from: (A) -> B
let to: (B, A) -> A
let from: (A) -> B
let to: (B, A) -> A
precedencegroup CombineLense{
associativity: left
higherThan: AssignmentPrecedence
associativity: left
higherThan: AssignmentPrecedence
infix operator >>>: CombineLense
func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> {
return Lense<A, C>.init(from: { (a) -> C in
}, to: { (c, a) -> A in
right.to( left.to(c, right.from(a)),a)
return Lense<A, C>.init(from: { (a) -> C in
}, to: { (c, a) -> A in
right.to( left.to(c, right.from(a)),a)
protocol CIntIndex {
associatedtype T;
subscript(index: CInt) -> T { get set};
associatedtype T;
subscript(index: CInt) -> T { get set};
extension Array: CIntIndex{
typealias T = Element
subscript(index: CInt) -> T {
guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range")
return self[Int(index)]
guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range")
self[Int(index)] = newValue
typealias T = Element
subscript(index: CInt) -> T {
guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range")
return self[Int(index)]
guard Int64(Int.max) >= Int64(index) else{
fatalError("cint index out of Int range")
self[Int(index)] = newValue
extension Array where Element: AnyObject{
mutating func remove(element: Element) {
if let index = index(where: { (node) -> Bool in
return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
}) {
remove(at: index)
mutating func remove(element: Element) {
if let index = index(where: { (node) -> Bool in
return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
}) {
remove(at: index)
//MARK: Array extension
extension Array where Element: Comparable{
/// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
/// - Parameter r: 前 r 个元素
/// - Returns: [(原有位置, 排好位置的元素)]
public func top(r: Int) -> [(Int, Element)] {
precondition(r <= self.count)
return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
/// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
/// - Parameter r: 前 r 个元素
/// - Returns: [(原有位置, 排好位置的元素)]
public func top(r: Int) -> [(Int, Element)] {
precondition(r <= self.count)
return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
extension Array {
func strideArray(inCount: Int = 20) -> Array<Element> {
if count < inCount {
return self
} else {
let stride = count / inCount
var newArray: [Element] = []
for i in 0..<inCount {
newArray.append(self[i * stride])
return newArray
extension String{
func cStr() -> UnsafePointer<Int8>? {
return (self as NSString).utf8String
func cStr() -> UnsafePointer<Int8>? {
return (self as NSString).utf8String
func address<T: AnyObject>(o: T) -> String {
return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
......@@ -15,207 +15,210 @@
import Foundation
public protocol SummableMultipliable: Equatable {
static func +(lhs: Self, rhs: Self) -> Self
static func *(lhs: Self, rhs: Self) -> Self
static func -(lhs: Self, rhs: Self) -> Self
static func +(lhs: Self, rhs: Self) -> Self
static func *(lhs: Self, rhs: Self) -> Self
static func -(lhs: Self, rhs: Self) -> Self
public protocol PrecisionType: SummableMultipliable{
init(inFloat: Float32)
init(inFloat16: Float16)
init<P: PrecisionType>(_ inP: P)
static var bitSize: UInt { get }
init(inFloat: Float32)
init(inFloat16: Float16)
init<P: PrecisionType>(_ inP: P)
static var bitSize: UInt { get }
public typealias Float16 = Int16
extension Float16: PrecisionType {
public static func * (prefix: Float16, postfix: Float16) {
return prefix * postfix
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = Float16(inFloat: inP as! Float32)
} else if P.bitSize == Float16.bitSize {
self = inP as! Float16
} else {
public static var bitSize: UInt {
return 16
public init(inFloat16: Float16) {
self = inFloat16
public init(inFloat: Float32) {
self = Int16(inFloat)
public static func * (prefix: Float16, postfix: Float16) {
return prefix * postfix
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = Float16(inFloat: inP as! Float32)
} else if P.bitSize == Float16.bitSize {
self = inP as! Float16
} else {
public static var bitSize: UInt {
return 16
public init(inFloat16: Float16) {
self = inFloat16
public init(inFloat: Float32) {
self = Int16(inFloat)
extension Float32: PrecisionType {
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = inP as! Float32
} else if P.bitSize == Float16.bitSize {
self = Float32.init(inP as! Float16)
} else {
public init(inFloat: Float32) {
self = inFloat
public init(inFloat16: Float16) {
self = Float32.init(inFloat16)
public static var bitSize: UInt {
return 32
public init<P>(_ inP: P) where P : PrecisionType {
if P.bitSize == Float32.bitSize {
self = inP as! Float32
} else if P.bitSize == Float16.bitSize {
self = Float32.init(inP as! Float16)
} else {
public init(inFloat: Float32) {
self = inFloat
public init(inFloat16: Float16) {
self = Float32.init(inFloat16)
public static var bitSize: UInt {
return 32
// N - 0 C - 1 H - 2 W - 3
struct DataLayout {
static func NCHW(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.C, dim[1]), (.H, dim[2]), (.W, dim[3])])
static func NHWC(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.H, dim[1]), (.W, dim[2]), (.C, dim[3])])
func count() -> Int {
return layoutWithDim.count
var N: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .N {
return layoutDim.1
return nil
set {
var newN = (Layout.N, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
var C: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .C {
return layoutDim.1
return nil
set {
var newN = (Layout.C, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
var H: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .H {
return layoutDim.1
return nil
static func NCHW(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.C, dim[1]), (.H, dim[2]), (.W, dim[3])])
static func NHWC(dim: Dim = Dim.init(inDim: [0, 0, 0, 0])) -> DataLayout {
return DataLayout.init([(.N, dim[0]), (.H, dim[1]), (.W, dim[2]), (.C, dim[3])])
func count() -> Int {
return layoutWithDim.count
var N: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .N {
return layoutDim.1
set {
var newN = (Layout.H, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .H
}) {
return nil
set {
var newN = (Layout.N, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
var C: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .C {
return layoutDim.1
var W: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .W {
return layoutDim.1
return nil
return nil
set {
var newN = (Layout.C, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .N
}) {
var H: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .H {
return layoutDim.1
set {
var newN = (Layout.W, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .W
}) {
return nil
set {
var newN = (Layout.H, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .H
}) {
var W: Int? {
get {
for layoutDim in layoutWithDim {
if layoutDim.0 == .W {
return layoutDim.1
init(_ inLayout: [(Layout, Int)]) {
layoutWithDim = inLayout
func layout() -> [Layout] {
return layoutWithDim.map({ (layout: Layout, dim: Int) -> Layout in
return layout
var layoutWithDim: [(Layout, Int)] = [(.N, 0), (.C, 0), (.H, 0), (.W, 0)]
func convertTo(inLayout: [Layout]) {
enum Layout: Int{
case N = 0
case C = 1
case H = 2
case W = 3
static func defaultLayout() -> [Layout] {
return [N, C, H, W]
return nil
set {
var newN = (Layout.W, newValue)
if let index = layoutWithDim.index(where: { (layout: Layout, dim: Int) -> Bool in
return layout == .W
}) {
init(_ inLayout: [(Layout, Int)]) {
layoutWithDim = inLayout
func layout() -> [Layout] {
return layoutWithDim.map({ (layout: Layout, dim: Int) -> Layout in
return layout
var layoutWithDim: [(Layout, Int)] = [(.N, 0), (.C, 0), (.H, 0), (.W, 0)]
func convertTo(inLayout: [Layout]) {
enum Layout: Int{
case N = 0
case C = 1
case H = 2
case W = 3
static func defaultLayout() -> [Layout] {
return [N, C, H, W]
extension DataLayout: Equatable {
public static func == (lhs: DataLayout, rhs: DataLayout) -> Bool {
if lhs.layoutWithDim.count == rhs.layoutWithDim.count {
var result = true
for i in 0..<lhs.layoutWithDim.count {
result = (lhs.layoutWithDim[i] == rhs.layoutWithDim[i])
return result
} else {
return false
public static func == (lhs: DataLayout, rhs: DataLayout) -> Bool {
if lhs.layoutWithDim.count == rhs.layoutWithDim.count {
var result = true
for i in 0..<lhs.layoutWithDim.count {
result = (lhs.layoutWithDim[i].0 == rhs.layoutWithDim[i].0)
if !result {
return result
} else {
return false
protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
public protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
extension Tensor: Variant {
......@@ -231,5 +234,5 @@ extension InputTexture: Variant {
extension MTLTexture where Self: Variant {
......@@ -15,130 +15,150 @@
import Foundation
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
public let resultArr: [P]
public var intermediateResults: [Texture<P>]?
public let elapsedTime: Double
public init(inDim: [Int], inResult: [P], inElapsedTime: Double, inIntermediateResults: [Texture<P>]? = nil) {
dim = inDim
resultArr = inResult
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
public let dim: [Int]
public let resultArr: [P]
public var intermediateResults: [Variant]?
public let elapsedTime: Double
public init(inDim: [Int], inResult: [P], inElapsedTime: Double, inIntermediateResults: [Variant]? = nil) {
dim = inDim
resultArr = inResult
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
var str = ""
str += "Dim: \(dim) \n value:[ "
if resultArr.count < 20 {
for d in resultArr {
str += " \(d) "
} else {
for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
str += " \(resultArr[d]) "
str += " ]"
return str
public var description: String {
return debugDescription
public var debugDescription: String {
var str = ""
str += "Dim: \(dim) \n value:[ "
if resultArr.count < 20 {
for d in resultArr {
str += " \(d) "
} else {
for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
str += " \(resultArr[d]) "
str += " ]"
return str
public var description: String {
return debugDescription
public class Executor<P: PrecisionType> {
var ops: [Runable & InferShaperable] = []
let program: Program
let device: MTLDevice
let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
program = inProgram
device = inDevice
queue = inQueue
for block in inProgram.programDesc.blocks {
for i in 0..<block.ops.count {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
} catch let error {
throw error
var ops: [Runable & InferShaperable] = []
let program: Program
let device: MTLDevice
let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
program = inProgram
device = inDevice
queue = inQueue
for block in inProgram.programDesc.blocks {
for i in 0..<39 {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
// op.inferShape()
} catch let error {
throw error
public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
let resInput: MTLTexture
if let inPre = preProcessKernle {
do {
try inPre.compute(inputTexuture: input, commandBuffer: buffer)
resInput = inPre.outputTexture
} catch let error {
throw error
} else {
resInput = input
public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
let resInput: MTLTexture
if let inPre = preProcessKernle {
do {
try inPre.compute(inputTexuture: input, commandBuffer: buffer)
resInput = inPre.outputTexture
} catch let error {
throw error
} else {
resInput = input
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
program.scope.setInput(input: inputTexture)
for op in ops {
do {
try op.run(device: device, buffer: buffer)
} catch let error {
throw error
buffer.addCompletedHandler { (commandbuffer) in
// let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p
// })
// print(inputArr)
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for op in self.ops {
// op.delogOutput()
// }
// return
// self.ops[2].delogOutput()
let afterDate = Date.init()
guard let outputVar = self.program.scope.output() else {
fatalError("output nil")
guard let output = outputVar as? Texture<P> else {
fatalError("output var type error")
let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
return p
}), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
program.scope.setInput(input: inputTexture)
//(ops.count - except)
for i in 0..<ops.count {
let op = ops[i]
do {
try op.run(device: device, buffer: buffer)
} catch let error {
throw error
public func clear() {
var outputTextures: [Variant]?
if except > 0 {
outputTextures = ops[ops.count - except].inputs()
buffer.addCompletedHandler { (commandbuffer) in
// return;
// let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p
// })
// writeToLibrary(fileName: "input_hand", array: inputArr)
// print("write to library done")
// return
// print(inputArr)
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
for op in self.ops {
// op.delogOutput()
// return
// self.ops[91].delogOutput()
// self.ops[92].delogOutput()
// self.ops[93].delogOutput()
let afterDate = Date.init()
var resultHolder: ResultHolder<P>
if except > 0 {
resultHolder = ResultHolder<P>.init(inDim: [], inResult: [], inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
} else {
let outputVar: Variant = self.program.scope.output()!
let output: Texture<P> = outputVar as! Texture<P>
resultHolder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
return p
}), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
public func clear() {
//public let paddle_executor: Executor = Executor.init()
......@@ -16,100 +16,101 @@ import Metal
import Foundation
protocol Fusion {
static func fusionNode() -> Node
static func change() -> [String : [(from: String, to: String)]]
static func fusionType() -> String
static func fusionNode() -> Node
static func change() -> [String : [(from: String, to: String)]]
static func fusionType() -> String
protocol Runable {
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput()
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput()
func inputs() -> [Variant]
extension Runable where Self: OperatorProtocol{
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try runImpl(device: device, buffer: buffer)
} catch let error {
throw error
// print(type + ": " + para.outputDesc())
func delogOutput() {
print(type + ": has no implementation" )
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try runImpl(device: device, buffer: buffer)
} catch let error {
throw error
// print(type + ": " + para.outputDesc())
func delogOutput() {
print(type + ": has no implementation" )
protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
protocol InferShaperable {
func inferShape()
func inferShape()
protocol OperatorProtocol {
associatedtype ParamType
associatedtype KerType: Computable where Self.KerType.ParamType == ParamType
var type: String { get }
var scope: Scope { get }
var inputs: [String : [String]] { get }
var paraInputs: [String : [String]] { get set }
var outpus: [String : [String]] { get }
var attrs: [String : Attr] { get }
var para: ParamType { get }
var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
associatedtype ParamType
associatedtype KerType: Computable where Self.KerType.ParamType == ParamType
var type: String { get }
var scope: Scope { get }
var inputs: [String : [String]] { get }
var paraInputs: [String : [String]] { get set }
var outpus: [String : [String]] { get }
var attrs: [String : Attr] { get }
var para: ParamType { get }
var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
typealias ParamType = ParameterType
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
type = opDesc.type
scope = inScope
inputs = opDesc.inputs
outpus = opDesc.outputs
attrs = opDesc.attrs
paraInputs = opDesc.paraInputs
do {
para = try ParamType.init(opDesc:opDesc, inScope: inScope)
} catch let error {
throw error
kernel = KernelType.init(device: device, param: para)
typealias ParamType = ParameterType
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
type = opDesc.type
scope = inScope
inputs = opDesc.inputs
outpus = opDesc.outputs
attrs = opDesc.attrs
paraInputs = opDesc.paraInputs
do {
para = try ParamType.init(opDesc:opDesc, inScope: inScope)
} catch let error {
throw error
kernel = KernelType.init(device: device, param: para)
// op infos
......@@ -15,45 +15,50 @@
import Foundation
class BatchNormParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs)
} catch let error {
throw error
let input: Texture<P>
var output: Texture<P>
let inputBias: Tensor<ParamPrecisionType>
let inputMean: Tensor<ParamPrecisionType>
let inputScale: Tensor<ParamPrecisionType>
let inputVariance: Tensor<ParamPrecisionType>
let epsilon: Float
let momentum: Float
let is_test: Bool
let input: Texture<P>
var output: Texture<P>
let inputBias: Tensor<ParamPrecisionType>
let inputMean: Tensor<ParamPrecisionType>
let inputScale: Tensor<ParamPrecisionType>
let inputVariance: Tensor<ParamPrecisionType>
let epsilon: Float
let momentum: Float
let is_test: Bool
class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = BatchNormOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func inputs() -> [Variant] {
return [para.input, para.inputBias, para.inputMean, para.inputScale, para.inputVariance]
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = BatchNormOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class BoxcoderParam<P: PrecisionType>: OpParam {
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class BoxcoderParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope)
output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope)
codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs)
boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs)
} catch let error {
throw error
assert(priorBox.transpose == [0, 1, 2, 3])
assert(priorBoxVar.transpose == [0, 1, 2, 3])
assert(targetBox.transpose == [0, 1, 2, 3])
assert(codeType == "decode_center_size") // encode_center_size is not implemented
assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
do {
priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope)
output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope)
codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs)
boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs)
} catch let error {
throw error
assert(priorBox.transpose == [0, 1, 2, 3])
assert(priorBoxVar.transpose == [0, 1, 2, 3])
assert(targetBox.transpose == [0, 1, 2, 3])
assert(codeType == "decode_center_size") // encode_center_size is not implemented
assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
let priorBox: Texture<P>
let priorBoxVar: Texture<P>
......@@ -39,23 +39,42 @@ class BoxcoderParam<P: PrecisionType>: OpParam {
var output: Texture<P>
let codeType: String
let boxNormalized: Bool
class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{
class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{
func inputs() -> [Variant] {
return [para.priorBox, para.priorBoxVar, para.targetBox]
func inferShape() {
// para.output.dim = para.input.dim
// para.output.dim = para.input.dim
typealias OpType = BoxcoderOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
let outputArray = para.output.metalTexture.floatArray { (o: Float32) -> Float32 in
return o
// writeToLibrary(fileName: "boxcoder_output", array: outputArray)
print(" write done ")
......@@ -15,44 +15,67 @@
import Foundation
class ConcatParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
guard let xlist = opDesc.inputs["X"] else {
for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else {
axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs)
output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
guard let xlist = opDesc.inputs["X"] else {
for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else {
axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs)
output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
var input: [Texture<P>] = []
var output: Texture<P>
let axis: Int
var input: [Texture<P>] = []
var output: Texture<P>
let axis: Int
class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]}
// para.output.dim = Dim.init(inDim: dim)
func inputs() -> [Variant] {
return para.input
func inferShape() {
// let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]}
// para.output.dim = Dim.init(inDim: dim)
typealias OpType = ConcatOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
typealias OpType = ConcatOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
let outputArray = para.output.metalTexture.floatArray { (o: Float32) -> Float32 in
return o
let device: MTLDevice = MTLCreateSystemDefaultDevice()!
// let tensorArray: [P] = device.texture2tensor(texture: para.output.metalTexture, dim: [1917, 4])
// print(tensorArray.strideArray())
// print(para.output.metalTexture)
// writeToLibrary(fileName: "concat_out", array: outputArray)
// print(" write done ")
// print(outputArray.strideArray())
......@@ -16,120 +16,125 @@ import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let epsilon: Float32
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let epsilon: Float32
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddBatchNormReluOp<P>
func inputs() -> [Variant] {
return [para.variance, para.bias, para.mean, para.scale, para.y, para.filter, para.input]
typealias OpType = ConvAddBatchNormReluOp<P>
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
para.output.dim = Dim.init(inDim: outDim)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType)
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
static func change() -> [String : [(from: String, to: String)]] {
return [:]
static func fusionType() -> String {
return gConvAddBatchNormReluType
func delogOutput() {
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType)
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
static func change() -> [String : [(from: String, to: String)]] {
return [:]
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
static func fusionType() -> String {
return gConvAddBatchNormReluType
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
func delogOutput() {
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
writeToLibrary(fileName: "output_112x112x32_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
writeToLibrary(fileName: "output_112x112x32_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
......@@ -15,80 +15,108 @@
import Foundation
class ConvAddParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType)
return beginNode
func delogOutput() {
print(" conv add: ")
// print(para.input.metalTexture)
static func change() -> [String : [(from: String, to: String)]] {
return [:]
// print(" filter array: ")
// let filterArray: [P] = para.filter.buffer.array()
// print(filterArray)
let input = para.input.metalTexture.floatArray { (p: P) -> P in
return p
// print(input)
static func fusionType() -> String {
return gConvAddType
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
// print(para.output.metalTexture)
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType)
return beginNode
static func change() -> [String : [(from: String, to: String)]] {
return [:]
func inputs() -> [Variant] {
return [para.input, para.y, para.filter]
static func fusionType() -> String {
return gConvAddType
typealias OpType = ConvAddOp<P>
func inferShape() {
typealias OpType = ConvAddOp<P>
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
para.output.dim = Dim.init(inDim: outDim)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
......@@ -15,115 +15,164 @@
import Foundation
class ConvBNReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvBNReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvBNReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvBNReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvBNReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvBNReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvBNReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvBNReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvBNReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvBNReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvBNReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvBNReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvBNReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvBNReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvBNReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
epsilon = try ConvBNReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
groups = try ConvBNReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvBNReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvBNReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvBNReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvBNReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let epsilon: Float32
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
let input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let bias: Tensor<ParamPrecisionType>
let mean: Tensor<ParamPrecisionType>
let scale: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let epsilon: Float32
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
class ConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvBNReluOp<P>
typealias OpType = ConvBNReluOp<P>
func inputs() -> [Variant] {
return [para.input, para.variance, para.bias, para.mean, para.scale, para.filter]
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
para.output.dim = Dim.init(inDim: outDim)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
static func change() -> [String : [(from: String, to: String)]] {
return [:]
static func fusionType() -> String {
return gConvBnReluType
func delogOutput() {
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
static func change() -> [String : [(from: String, to: String)]] {
return [:]
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
static func fusionType() -> String {
return gConvBnReluType
func delogOutput() {
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
writeToLibrary(fileName: "output_112x112x32_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
// print("input: ")
// print(para.input.metalTexture)
// let input = para.input.metalTexture.floatArray { (p: P) -> P in
// return p
// }
// for i in 0..<input.count {
// print(" index \(i) : \(input[i])")
// }
// print(input)
// writeToLibrary(fileName: "input35", array: input)
// print(input)
// let newScale = para.newScale?.contents().bindMemory(to: P.self, capacity: para.newScale!.length)
// let newBiase = para.newBiase?.contents().bindMemory(to: P.self, capacity: para.newBiase!.length)
// let filterArray: [Float32] = para.filter.buffer.array();
//// writeToLibrary(fileName: "filter35", array: filterArray)
// print(filterArray)
// print("new scale: ")
// for i in 0..<(para.newScale!.length / MemoryLayout<P>.size) {
// print("index: \(i) \(newScale![i]) ")
// }
// print("new biase: ")
// for i in 0..<(para.newBiase!.length / MemoryLayout<P>.size) {
// print("index: \(i) \(newBiase![i]) ")
// }
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
writeToLibrary(fileName: "batch_norm_34.tmp_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: true)
......@@ -15,74 +15,79 @@
import Foundation
class ConvParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
} catch let error {
throw error
let input: Texture<P>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
let input: Texture<P>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
func inputs() -> [Variant] {
return [para.input, para.filter]
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
typealias OpType = ConvOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func delogOutput() {
print("conv output : ")
// let _: Float16? = para.output.metalTexture.logDesc()
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
typealias OpType = ConvOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
print("conv output : ")
// let _: Float16? = para.output.metalTexture.logDesc()
......@@ -28,6 +28,10 @@ class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTransposeParam<P>>, Runable, Creator, InferShaperable{
func inputs() -> [Variant] {
return [para.input, para.filter]
func inferShape() {
// para.output.dim = para.input.dim
......@@ -15,49 +15,54 @@
import Foundation
class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
func inputs() -> [Variant] {
return [para.input, para.filter]
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
typealias OpType = DepthConvOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
print("conv output : ")
// let _: Float16? = para.output.metalTexture.logDesc()
para.output.dim = Dim.init(inDim: outDim)
typealias OpType = DepthConvOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
print("conv output : ")
// let _: Float16? = para.output.metalTexture.logDesc()
......@@ -15,75 +15,79 @@
import Foundation
class DwConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvBNReluOp<P>
typealias OpType = ConvBNReluOp<P>
func inputs() -> [Variant] {
return [para.input, para.bias, para.mean, para.filter, para.variance, para.scale]
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
para.output.dim = Dim.init(inDim: outDim)
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
para.output.dim = Dim.init(inDim: outDim)
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gDepthConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
static func change() -> [String : [(from: String, to: String)]] {
return [:]
static func fusionType() -> String {
return gDwConvBnReluType
func delogOutput() {
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gDepthConvType)
_ = beginNode
--> Node.init(inType: gBatchNormType)
--> Node.init(inType: gReluType)
return beginNode
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
static func change() -> [String : [(from: String, to: String)]] {
return [:]
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
static func fusionType() -> String {
return gDwConvBnReluType
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
func delogOutput() {
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
// let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p
writeToLibrary(fileName: "output_112x112x32_2", array: output)
print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
// let output = para.output.metalTexture.floatArray { (p: P) -> P in
// return p
// }
// writeToLibrary(fileName: "batch_norm_19.tmp_2", array: output)
// print(" write done")
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
......@@ -15,33 +15,37 @@
import Foundation
class ElementwiseAddParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
let input: Texture<P>
let inputY: Tensor<P>
var output: Texture<P>
let axis: Int
let input: Texture<P>
let inputY: Tensor<P>
var output: Texture<P>
let axis: Int
class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = ElementwiseAddOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
func inputs() -> [Variant] {
return [para.input, para.inputY]
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = ElementwiseAddOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
......@@ -15,54 +15,58 @@
import Foundation
class FeedParam<P: PrecisionType>: OpParam{
var output: Texture<P>
var input: InputTexture {
return scope.input() as! InputTexture
var output: Texture<P>
var input: InputTexture {
return scope.input() as! InputTexture
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
typealias ParamPrecisionType = P
class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FeedOp<P>
func inferShape() {
// print("feed input: \(para.input.expectDim)")
print("feed output: \(para.output.dim)")
// para.output.dim =
// para.output.dim = para.input.expectDim
typealias OpType = FeedOp<P>
func inputs() -> [Variant] {
return [para.input]
func inferShape() {
// print("feed input: \(para.input.expectDim)")
print("feed output: \(para.output.dim)")
// para.output.dim =
// para.output.dim = para.input.expectDim
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
// let resizeKernel = ResizeKernel<P>.init(device: device)
// let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
// do {
// try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
// } catch let error {
// throw error
// }
func delogOutput() {
// para.input.mtlTexture.logDesc()
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
// let resizeKernel = ResizeKernel<P>.init(device: device)
// let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
// do {
// try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
// } catch let error {
// throw error
// }
func delogOutput() {
// para.input.mtlTexture.logDesc()
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
......@@ -15,40 +15,44 @@
import Foundation
class FetchParam<P: PrecisionType>: OpParam{
var output: Texture<P>
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = input
} catch let error {
throw error
var output: Texture<P>
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = input
} catch let error {
throw error
typealias ParamPrecisionType = P
typealias ParamPrecisionType = P
class FetchKernel<P: PrecisionType>: Kernel, Computable {
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
required init(device: MTLDevice, param: FetchParam<P>) {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
required init(device: MTLDevice, param: FetchParam<P>) {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
typealias OpType = FetchOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
scope.setOutput(output: para.output)
func inputs() -> [Variant] {
return [para.input]
func inferShape() {
typealias OpType = FetchOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
scope.setOutput(output: para.output)
......@@ -50,7 +50,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
param.output.initTexture(device: device, transpose: [0, 2, 3, 1])
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
......@@ -25,7 +25,7 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
super.init(device: device, inFunctionName: "conv_add_3x3")
param.output.initTexture(device: device, transpose: [0, 3, 1, 2])
param.output.initTexture(device: device, inTranspose: [0, 3, 2, 1])
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
......@@ -59,7 +59,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
} else {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
param.output.initTexture(device: device, transpose: [0, 2, 3, 1])
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
param.variance.initBuffer(device: device)
......@@ -70,8 +70,13 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
print("offset x: \(offsetX)")
print("offset y: \(offsetY)")
print(" param filter width: \(param.filter.width)")
print(" param filter height: \(param.filter.height)")
print(" param paddings: \(param.paddings)")
print("ConvBNReluKernel offset x: \(offsetX)")
print("ConvBNReluKernel offset y: \(offsetY)")
let offsetZ = 0.0
......@@ -116,8 +121,8 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.newScale!, offset: 0, index: 3)
encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
encoder.setBuffer(param.newScale!, offset: 0, index: 2)
encoder.setBuffer(param.newBiase!, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
......@@ -132,9 +137,8 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
var inMetalParam = param.metalParam
encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 2)
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
......@@ -15,88 +15,99 @@
import Foundation
struct PriorBoxMetalParam {
let offset: Float32
let stepWidth: Float32
let stepHeight: Float32
let minSize: Float32
let maxSize: Float32
let imageWidth: Float32
let imageHeight: Float32
let clip: Bool
let numPriors: uint
let aspecRatiosSize: uint
let minSizeSize: uint
let maxSizeSize: uint
let offset: Float32
let stepWidth: Float32
let stepHeight: Float32
let minSize: Float32
let maxSize: Float32
let imageWidth: Float32
let imageHeight: Float32
let clip: Bool
let numPriors: uint
let aspecRatiosSize: uint
let minSizeSize: uint
let maxSizeSize: uint
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "prior_box")
param.output.initTexture(device: device, transpose: [2, 0, 1, 3])
param.outputVariances.initTexture(device: device, transpose: [2, 0, 1, 3])
let imageWidth = Float32(param.inputImage.originDim[3])
let imageHeight = Float32(param.inputImage.originDim[2])
let featureWidth = param.inputImage.originDim[3]
let featureHeight = param.inputImage.originDim[2]
if param.stepW == 0 || param.stepH == 0 {
param.stepW = Float32(imageWidth) / Float32(featureWidth)
param.stepH = Float32(imageHeight) / Float32(featureHeight)
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "prior_box")
param.output.initTexture(device: device, inTranspose: [2, 0, 1, 3])
param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3])
let imageWidth = Float32(param.inputImage.originDim[3])
let imageHeight = Float32(param.inputImage.originDim[2])
let featureWidth = param.input.originDim[3]
let featureHeight = param.input.originDim[2]
if param.stepW == 0 || param.stepH == 0 {
param.stepW = Float32(imageWidth) / Float32(featureWidth)
param.stepH = Float32(imageHeight) / Float32(featureHeight)
var outputAspectRatior: [Float32] = []
let epsilon = 1e-6
for ar in param.aspectRatios {
var alreadyExist = false
for outputAr in outputAspectRatior {
if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
alreadyExist = true
var outputAspectRatior: [Float32] = []
let epsilon = 1e-6
for ar in param.aspectRatios {
var alreadyExist = false
for outputAr in outputAspectRatior {
if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
alreadyExist = true
if !alreadyExist {
if param.flip {
outputAspectRatior.append(1.0 / ar)
param.newAspectRatios = outputAspectRatior
let aspectRatiosSize = uint(outputAspectRatior.count)
let maxSizeSize: uint = uint(param.maxSizes.count)
let minSizeSize: uint = uint(param.minSizes.count)
let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
let minSize = param.minSizes.last ?? 0.0
let maxSize = param.maxSizes.last ?? 0.0
metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
if !alreadyExist {
if param.flip {
outputAspectRatior.append(1.0 / ar)
func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setTexture(param.outputVariances.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 0)
encoder.setBytes(param.aspectRatios, length: MemoryLayout<Float32>.size * param.aspectRatios.count, index: 1)
encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
param.newAspectRatios = outputAspectRatior
let aspectRatiosSize = uint(outputAspectRatior.count)
let maxSizeSize: uint = uint(param.maxSizes.count)
let minSizeSize: uint = uint(param.minSizes.count)
let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
let minSize = param.minSizes.last ?? 0.0
let maxSize = param.maxSizes.last ?? 0.0
metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
print("metalParam: \(metalParam)")
print(" newAspectRatios ")
print(" clip: \(metalParam.clip)")
print(" metalParam.numPriors: \(metalParam.numPriors)")
print(" aspecRatiosSize: \(metalParam.aspecRatiosSize)")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setTexture(param.outputVariances.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 0)
encoder.setBytes(param.newAspectRatios!, length: MemoryLayout<Float32>.size * param.newAspectRatios!.count, index: 1)
encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
......@@ -32,7 +32,7 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: FeedParam<P>) {
param.output.initTexture(device: device, transpose: [0, 2, 3, 1])
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
......@@ -15,80 +15,92 @@
import Foundation
struct TransposeMetalParam {
var iC: Int32 = 0
var oC: Int32 = 0
var i0: Int32
var i1: Int32
var i2: Int32
var i3: Int32
init(_ i0: Int32, _ i1: Int32, _ i2: Int32, _ i3: Int32) {
self.i0 = i0
self.i1 = i1
self.i2 = i2
self.i3 = i3
init(_ axis: [Int]) {
self.init(Int32(axis[0]), Int32(axis[1]), Int32(axis[2]), Int32(axis[3]))
var iC: Int32 = 0
var oC: Int32 = 0
var i0: Int32
var i1: Int32
var i2: Int32
var i3: Int32
init(_ i0: Int32, _ i1: Int32, _ i2: Int32, _ i3: Int32) {
self.i0 = i0
self.i1 = i1
self.i2 = i2
self.i3 = i3
init(_ axis: [Int]) {
self.init(Int32(axis[0]), Int32(axis[1]), Int32(axis[2]), Int32(axis[3]))
struct TransposeTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
let iC: Int
let oC: Int
let axis: [Int]
let inputTexture: MTLTexture
let outputTexture: MTLTexture
let iC: Int
let oC: Int
let axis: [Int]
class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
var invT: [Int] = [0, 1, 2, 3]
for (i, v) in param.input.transpose.enumerated() {
invT[v] = i
var axis: [Int] = [0, 1, 2, 3]
for i in 0..<param.axis.count {
axis[4-param.axis.count+i] = 4 - param.axis.count + Int(param.axis[i])
let realAxis = axis.map {invT[$0]}
var tmp = TransposeMetalParam.init(realAxis)
tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3])
if realAxis == [0, 1, 2, 3] {
print("====> transpose! FAST :)")
} else {
print("====> transpose! SLOW :(")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
var metalParam: TransposeMetalParam!
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
required init(device: MTLDevice, param: TransposeParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3])
super.init(device: device, inFunctionName: "transpose")
required init(device: MTLDevice, param: TransposeParam<P>) {
param.output.initTexture(device: device, transpose: [0, 1, 2, 3])
super.init(device: device, inFunctionName: "transpose")
var invT: [Int] = [0, 1, 2, 3]
for (i, v) in param.input.transpose.enumerated() {
invT[v] = i
required init(device: MTLDevice, testParam: TransposeTestParam) {
super.init(device: device, inFunctionName: "transpose")
var axis: [Int] = [0, 1, 2, 3]
// var doNothing = false
// if param.axis.count == param.input.transpose.count {
// doNothing = param.axis == param.input.transpose.map { Int32($0) }
// }
for i in 0..<param.axis.count {
axis[4-param.axis.count+i] = 4 - param.axis.count + Int(param.axis[i])
let realAxis = axis.map {invT[$0]}
var tmp = TransposeMetalParam.init(realAxis)
tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3])
if realAxis == [0, 1, 2, 3] {
print("====> transpose! FAST :)")
} else {
print("====> transpose! SLOW :(")
metalParam = tmp
required init(device: MTLDevice, testParam: TransposeTestParam) {
super.init(device: device, inFunctionName: "transpose")
public func test(commandBuffer: MTLCommandBuffer, param: TransposeTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
public func test(commandBuffer: MTLCommandBuffer, param: TransposeTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var tmp = TransposeMetalParam.init(param.axis)
tmp.iC = Int32(param.iC)
tmp.oC = Int32(param.oC)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var tmp = TransposeMetalParam.init(param.axis)
tmp.iC = Int32(param.iC)
tmp.oC = Int32(param.oC)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
// common.metal
// paddle-mobile
// Created by liuRuiLong on 2018/8/26.
// Copyright © 2018年 orange. All rights reserved.
#include <metal_stdlib>
using namespace metal;
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
......@@ -704,9 +704,8 @@ kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTe
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
......@@ -749,9 +748,8 @@ kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTe
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
......@@ -803,8 +801,8 @@ kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sa
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
......@@ -60,7 +60,7 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
float4 res;
if (param.clip) {
res = min(max(box, 0.0), 1.0);
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
......@@ -74,7 +74,7 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
......@@ -92,6 +92,7 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ReshapeParam {
int32_t idim[4];
int32_t itrans[4];
int32_t odim[4];
int32_t otrans[4];
kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant ReshapeParam &rp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4];
ReshapeParam lrp = rp;
int oC = lrp.odim[lrp.otrans[3]];
int iC = lrp.idim[lrp.itrans[3]];
int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
float4 r;
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
//4 (gid.x gid.y, gid.z, 0~4)
xyzn2abcd(oC, oxyzn, oabcd);
int tabcd[4];
invtrans(lrp.otrans, oabcd, tabcd);
int index = abcd2index(lrp.odim, tabcd);
if (index < count) {
int c = index % 4;
int temp0 = index % (inTexture.get_array_size() * 4);
int slice = temp0 / 4;
int temp1 = index % (inTexture.get_array_size() * 4 * lrp.idim[2]);
int w = temp1 / (inTexture.get_array_size() * 4);
int h = index / (inTexture.get_array_size() * 4 * lrp.idim[2]);
// index2abcd(lrp.idim, index, tabcd);
// abcd2xyzn(iC, tabcd, ixyzn);
r[n] = inTexture.read(uint2(w, h), slice)[c];
} else {
r[n] = 0;
outTexture.write(r, gid.xy, gid.z);
//kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
// uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) return;
// half4 r = inTexture.read(uint2(0, 0), gid.x);
// outTexture.write(r, gid.xy, gid.z);
......@@ -31,7 +31,11 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inputs() -> [Variant] {
return [para.scores,para.bboxes]
func inferShape() {
// para.output.dim = para.input.dim
......@@ -15,54 +15,58 @@
import Foundation
class PoolParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
} catch let error {
throw error
// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
} catch let error {
throw error
let input: Texture<P>
var output: Texture<P>
var ksize: [Int32]
var stride: [Int32]
var padding: [Int32]
var poolType: String
var ceilMode: Bool
var globalPooling: Bool
// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
let input: Texture<P>
var output: Texture<P>
var ksize: [Int32]
var stride: [Int32]
var padding: [Int32]
var poolType: String
var ceilMode: Bool
var globalPooling: Bool
class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// para.output.dim = para.input.dim
typealias OpType = PoolOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
print("pool2d delog")
let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
func inputs() -> [Variant] {
return [para.input]
func inferShape() {
// para.output.dim = para.input.dim
typealias OpType = PoolOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func delogOutput() {
print("pool2d delog")
let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
......@@ -35,6 +35,10 @@ class PreluParam<P: PrecisionType>: OpParam {
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
func inputs() -> [Variant] {
return [para.alpha, para.input]
func inferShape() {
// para.output.dim = para.input.dim
......@@ -15,33 +15,37 @@
import Foundation
class ReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error {
throw error
let input: Texture<P>
var output: Texture<P>
let input: Texture<P>
var output: Texture<P>
class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = ReluOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func inputs() -> [Variant] {
return [para.input]
func inferShape() {
para.output.dim = para.input.dim
typealias OpType = ReluOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
......@@ -17,7 +17,7 @@ import Foundation
struct TensorDesc {
let dims: [Int]
let dataType: VarTypeType
let dataLayout: DataLayout = DataLayout.NHWC()
let dataLayout: DataLayout = DataLayout.NCHW()
var NCHWDim: [Int] {
get {
if dims.count != 4 {
......@@ -53,7 +53,7 @@ struct TensorDesc {
init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 }
dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) }
dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
......@@ -174,7 +174,7 @@ class Tensor<P: PrecisionType>: Tensorial {
fatalError(" not support !")
//TODO: release
// data.release()
var width: Int {
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册