提交 d4225149 编写于 作者: L liuruilong

add pribox nms convTranspose prelu op

上级 1045c42f
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objects = {
/* Begin PBXBuildFile section */
FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6842212F00CC00D2448E /* PreluKernel.metal */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
FCEB683F212F00CC00D2448E /* PreluKernel.metallib */ = {isa = PBXFileReference; explicitFileType = "archive.metal-library"; includeInIndex = 0; path = PreluKernel.metallib; sourceTree = BUILT_PRODUCTS_DIR; };
FCEB6842212F00CC00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXGroup section */
FCEB6838212F00CC00D2448E = {
isa = PBXGroup;
children = (
FCEB6841212F00CC00D2448E /* PreluKernel */,
FCEB6840212F00CC00D2448E /* Products */,
);
sourceTree = "<group>";
};
FCEB6840212F00CC00D2448E /* Products */ = {
isa = PBXGroup;
children = (
FCEB683F212F00CC00D2448E /* PreluKernel.metallib */,
);
name = Products;
sourceTree = "<group>";
};
FCEB6841212F00CC00D2448E /* PreluKernel */ = {
isa = PBXGroup;
children = (
FCEB6842212F00CC00D2448E /* PreluKernel.metal */,
);
path = PreluKernel;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
FCEB683E212F00CC00D2448E /* PreluKernel */ = {
isa = PBXNativeTarget;
buildConfigurationList = FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */;
buildPhases = (
FCEB683D212F00CC00D2448E /* Sources */,
);
buildRules = (
);
dependencies = (
);
name = PreluKernel;
productName = PreluKernel;
productReference = FCEB683F212F00CC00D2448E /* PreluKernel.metallib */;
productType = "com.apple.product-type.metal-library";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
FCEB6839212F00CC00D2448E /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 0940;
ORGANIZATIONNAME = orange;
TargetAttributes = {
FCEB683E212F00CC00D2448E = {
CreatedOnToolsVersion = 9.4.1;
};
};
};
buildConfigurationList = FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */;
compatibilityVersion = "Xcode 9.3";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
);
mainGroup = FCEB6838212F00CC00D2448E;
productRefGroup = FCEB6840212F00CC00D2448E /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
FCEB683E212F00CC00D2448E /* PreluKernel */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
FCEB683D212F00CC00D2448E /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
FCEB6844212F00CC00D2448E /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
IPHONEOS_DEPLOYMENT_TARGET = 11.4;
MTL_ENABLE_DEBUG_INFO = YES;
SDKROOT = iphoneos;
};
name = Debug;
};
FCEB6845212F00CC00D2448E /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
IPHONEOS_DEPLOYMENT_TARGET = 11.4;
MTL_ENABLE_DEBUG_INFO = NO;
SDKROOT = iphoneos;
};
name = Release;
};
FCEB6847212F00CC00D2448E /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = Z5M2UUN5YV;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
};
FCEB6848212F00CC00D2448E /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = Z5M2UUN5YV;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCEB6844212F00CC00D2448E /* Debug */,
FCEB6845212F00CC00D2448E /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCEB6847212F00CC00D2448E /* Debug */,
FCEB6848212F00CC00D2448E /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = FCEB6839212F00CC00D2448E /* Project object */;
}
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:PreluKernel.xcodeproj">
</FileRef>
</Workspace>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>PreluKernel.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>0</integer>
</dict>
</dict>
</dict>
</plist>
//
// PreluKernel.metal
// PreluKernel
//
// Created by liuRuiLong on 2018/8/23.
// Copyright © 2018年 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
......@@ -62,10 +62,11 @@ struct MobileNet: Net{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String{
subscript(index: Int) -> String {
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
func resultStr(res: [Float]) -> String {
......@@ -102,6 +103,166 @@ struct MobileNet_ssd_hand: Net{
fatalError()
}
func bboxArea(box: [Float32], normalized: Bool) -> Float32 {
if box[2] < box[0] || box[3] < box[1] {
return 0.0
} else {
let w = box[2] - box[0]
let h = box[3] - box[1]
if normalized {
return w * h
} else {
return (w + 1) * (h + 1)
}
}
}
func jaccardOverLap(box1: [Float32], box2: [Float32], normalized: Bool) -> Float32 {
if box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1] {
return 0.0
} else {
let interXmin = max(box1[0], box2[0])
let interYmin = max(box1[1], box2[1])
let interXmax = min(box1[2], box2[2])
let interYmax = min(box1[3], box2[3])
let interW = interXmax - interXmin
let interH = interYmax - interYmin
let interArea = interW * interH
let bbox1Area = bboxArea(box: box1, normalized: normalized)
let bbox2Area = bboxArea(box: box2, normalized: normalized)
return interArea / (bbox1Area + bbox2Area - interArea)
}
}
func fetchResult(paddleMobileRes: [String : Texture<Float32>]) -> [Float32]{
let bbox = paddleMobileRes["box_coder_0.tmp_0"] ?! " no bbox "
let scores = paddleMobileRes["transpose_12.tmp_0"] ?! " no scores "
let score_thredshold: Float32 = 0.01
let nms_top_k = 400
let keep_top_k = 200
let nms_eta: Float32 = 1.0
var nms_threshold: Float32 = 0.45
let bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in
return f
}
let scoresArr = scores.metalTexture.floatArray { (f) -> Float32 in
return f
}
var scoreFormatArr: [Float32] = []
var outputArr: [Float32] = []
let numOfOneC = (scores.originDim[2] + 3) / 4 // 480
let cNumOfOneClass = numOfOneC * 4 // 1920
let boxSize = bbox.originDim[2] // 4
let classNum = scores.originDim[1] // 7
let classNumOneTexture = classNum * 4 // 28
for c in 0..<classNum {
for n in 0..<numOfOneC {
let to = n * classNumOneTexture + c * 4
scoreFormatArr.append(scoresArr[to])
scoreFormatArr.append(scoresArr[to + 1])
scoreFormatArr.append(scoresArr[to + 2])
scoreFormatArr.append(scoresArr[to + 3])
}
}
var selectedIndexs: [Int : [(Int, Float32)]] = [:]
var numDet: Int = 0
for i in 0..<classNum {
var sliceScore = scoreFormatArr[(i * cNumOfOneClass)..<((i + 1) * cNumOfOneClass)]
var scoreThresholdArr: [(Float32, Int)] = []
for i in 0..<cNumOfOneClass {
if sliceScore[i] > score_thredshold {
scoreThresholdArr.append((sliceScore[i], i))
}
}
scoreThresholdArr.sort { $0 > $1 }
if scoreThresholdArr.count > nms_top_k {
scoreThresholdArr.removeLast(scoreThresholdArr.count - nms_top_k)
}
var selectedIndex: [(Int, Float32)] = []
while scoreThresholdArr.count > 0 {
let idx = scoreThresholdArr[0].1
let score = scoreThresholdArr[0].0
var keep = true
for j in 0..<selectedIndex.count {
if keep {
let keptIdx = selectedIndex[j].0
let box1 = Array<Float32>(bboxArr[(idx * boxSize)..<(idx * boxSize + 4)])
let box2 = Array<Float32>(bboxArr[(idx * boxSize)..<(keptIdx * boxSize + 4)])
let overlap = jaccardOverLap(box1: box1, box2: box2, normalized: true)
keep = (overlap <= nms_threshold)
} else {
break
}
}
if keep {
selectedIndex.append((idx, score))
}
scoreThresholdArr.removeFirst()
if keep && nms_eta < 1.0 && nms_threshold > 0.5 {
nms_threshold *= nms_eta
}
}
selectedIndexs[i] = selectedIndex
numDet += selectedIndex.count
}
var scoreIndexPairs: [(Float32, (Int, Int))] = []
for selected in selectedIndexs {
for scoreIndex in selected.value {
scoreIndexPairs.append((scoreIndex.1, (selected.key, scoreIndex.0)))
}
}
scoreIndexPairs.sort { $0.0 > $1.0 }
if scoreIndexPairs.count > keep_top_k {
scoreIndexPairs.removeLast(scoreIndexPairs.count - keep_top_k)
}
var newIndices: [Int : [(Int, Float32)]] = [:]
for scoreIndexPair in scoreIndexPairs {
// label: scoreIndexPair.1.0
let label = scoreIndexPair.1.0
if newIndices[label] != nil {
newIndices[label]?.append((scoreIndexPair.1.0, scoreIndexPair.0))
} else {
newIndices[label] = [(scoreIndexPair.1.0, scoreIndexPair.0)]
}
}
for indice in newIndices {
let selectedIndexAndScore = indice.value
for indexAndScore in selectedIndexAndScore {
outputArr.append(Float32(indice.key)) // label
outputArr.append(indexAndScore.1) // score
let subBox = bboxArr[(indexAndScore.0 * boxSize)..<(indexAndScore.0 * boxSize + 4)]
outputArr.append(contentsOf: subBox)
}
}
return outputArr
}
var preprocessKernel: CusomKernel
let dim = [1, 300, 300, 3]
let modelPath: String
......
......@@ -61,7 +61,6 @@ class ViewController: UIViewController {
executor?.clear()
program = nil
executor = nil
}
@IBAction func predictAct(_ sender: Any) {
......
//
// AppDelegate.swift
// paddle-mobile-unit-test
//
// Created by liuRuiLong on 2018/8/10.
// Copyright © 2018年 orange. All rights reserved.
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit
......
//
// ViewController.swift
// paddle-mobile-unit-test
//
// Created by liuRuiLong on 2018/8/10.
// Copyright © 2018年 orange. All rights reserved.
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit
import Metal
......
......@@ -69,6 +69,14 @@
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; };
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; };
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; };
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
......@@ -141,9 +149,17 @@
FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = "<group>"; };
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = "<group>"; };
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = "<group>"; };
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
......@@ -255,6 +271,8 @@
FCBCCC66212306B000D94F7E /* ConcatOp.swift */,
FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */,
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */,
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */,
);
path = Operators;
sourceTree = "<group>";
......@@ -279,15 +297,15 @@
FC086BA520E67E8500D85EF7 /* Kernels */ = {
isa = PBXGroup;
children = (
FCDDC6CD212FE02100E5EF74 /* Base */,
FCEB6837212F00B100D2448E /* metal */,
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
FCD04E6720F315020007374F /* PoolKernel.swift */,
FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
......@@ -299,6 +317,7 @@
FCBCCC68212306D300D94F7E /* ConcatKernel.swift */,
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */,
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
);
path = Kernels;
sourceTree = "<group>";
......@@ -313,6 +332,27 @@
path = Base;
sourceTree = "<group>";
};
FCDDC6CD212FE02100E5EF74 /* Base */ = {
isa = PBXGroup;
children = (
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
);
path = Base;
sourceTree = "<group>";
};
FCEB6837212F00B100D2448E /* metal */ = {
isa = PBXGroup;
children = (
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
FCEB6849212F00DB00D2448E /* PreluKernel.metal */,
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
);
path = metal;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
......@@ -417,6 +457,7 @@
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */,
FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */,
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
......@@ -426,11 +467,15 @@
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */,
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
......@@ -456,6 +501,7 @@
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
......@@ -468,6 +514,8 @@
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
......
......@@ -17,11 +17,13 @@ import Foundation
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
public let resultArr: [P]
public var intermediateResults: [Texture<P>]?
public let elapsedTime: Double
public init(inDim: [Int], inResult: [P], inElapsedTime: Double) {
public init(inDim: [Int], inResult: [P], inElapsedTime: Double, inIntermediateResults: [Texture<P>]? = nil) {
dim = inDim
resultArr = inResult
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
}
}
......@@ -116,7 +118,6 @@ public class Executor<P: PrecisionType> {
// self.ops[2].delogOutput()
let afterDate = Date.init()
guard let outputVar = self.program.scope.output() else {
......
......@@ -39,8 +39,19 @@ protocol OpParam {
static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
static func inputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
}
extension OpParam {
......@@ -58,16 +69,52 @@ extension OpParam {
return v
}
static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from)
return tensorVariances
} catch let error {
throw error
}
}
static func inputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from)
return alphaTensor
} catch let error {
throw error
}
}
static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from)
return tensorImage
} catch let error {
throw error
}
}
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
return tensorX
} catch let error {
throw error
}
}
static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from)
return tensorBox
} catch let error {
throw error
}
}
static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
......
......@@ -14,10 +14,12 @@
import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
......@@ -29,6 +31,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
......
......@@ -61,6 +61,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
typealias OpType = ConvAddOp<P>
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
......
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
}
class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTransposeParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// para.output.dim = para.input.dim
}
typealias OpType = ConvTransposeOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("conv transpose delog")
let _: P? = para.input.metalTexture.logDesc(header: "conv transpose input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "conv transpose output: ", stridable: true)
}
}
......@@ -110,6 +110,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
......
......@@ -13,12 +13,20 @@
limitations under the License. */
import Foundation
import MetalPerformanceShaders
class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddParam<P>) {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3")
}
param.output.initTexture(device: device, transpose: [0, 3, 1, 2])
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
......@@ -33,8 +41,6 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
......
......@@ -13,6 +13,7 @@
limitations under the License. */
import Foundation
import MetalPerformanceShaders
struct ConvBNReluTestParam: TestParam {
let inputTexture: MTLTexture
......@@ -24,6 +25,7 @@ struct ConvBNReluTestParam: TestParam {
let newBiaseBuffer: MTLBuffer
let filterSize: (width: Int, height: Int, channel: Int)
init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
inputTexture = inInputTexture
outputTexture = inOutputTexture
metalParam = inMetalParam
......
......@@ -14,7 +14,6 @@
import Foundation
public struct MetalConvParam {
let offsetX: Int16
let offsetY: Int16
......@@ -27,7 +26,14 @@ public struct MetalConvParam {
class ConvKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvParam<P>) {
super.init(device: device, inFunctionName: "conv_add_1x1")
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_1x1")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_3x3")
} else {
super.init(device: device, inFunctionName: "conv_3x3")
}
let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
let offsetZ = 0.0
......@@ -49,3 +55,5 @@ class ConvKernel<P: PrecisionType>: Kernel, Computable {
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct MetalConvTransposeParam {
let kernelW: UInt16;
let kernelH: UInt16;
let strideX: UInt16;
let strideY: UInt16;
let paddingX: UInt16;
let paddingY: UInt16;
let dilationX: UInt16;
let dilationY: UInt16;
}
class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: MetalConvTransposeParam!
required init(device: MTLDevice, param: ConvTransposeParam<P>) {
super.init(device: device, inFunctionName: "conv_transpose")
let kernelWidth = UInt16(param.filter.width)
let kernelHeight = UInt16(param.filter.height)
let strideX = UInt16(param.stride[0])
let strideY = UInt16(param.stride[1])
let paddingX = UInt16(param.paddings[0])
let paddingY = UInt16(param.paddings[1])
let dilationX = UInt16(param.dilations[0])
let dilationY = UInt16(param.dilations[1])
metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvTransposeParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
//
// PreluKernel.swift
// paddle-mobile
//
// Created by liuRuiLong on 2018/8/24.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
class PreluKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: PreluParam<P>) {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element")
} else {
super.init(device: device, inFunctionName: "prelu_other")
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
......@@ -14,18 +14,89 @@
import Foundation
struct PriorBoxMetalParam {
let offset: Float32
let stepWidth: Float32
let stepHeight: Float32
let minSize: Float32
let maxSize: Float32
let imageWidth: Float32
let imageHeight: Float32
let clip: Bool
let numPriors: uint
let aspecRatiosSize: uint
let minSizeSize: uint
let maxSizeSize: uint
}
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "priorbox")
param.output.initTexture(device: device, transpose: [2, 0, 1, 3])
param.outputVariances.initTexture(device: device, transpose: [2, 0, 1, 3])
let imageWidth = Float32(param.inputImage.originDim[3])
let imageHeight = Float32(param.inputImage.originDim[2])
let featureWidth = param.inputImage.originDim[3]
let featureHeight = param.inputImage.originDim[2]
if param.stepW == 0 || param.stepH == 0 {
param.stepW = Float32(imageWidth) / Float32(featureWidth)
param.stepH = Float32(imageHeight) / Float32(featureHeight)
}
var outputAspectRatior: [Float32] = []
outputAspectRatior.append(1.0)
let epsilon = 1e-6
for ar in param.aspectRatios {
var alreadyExist = false
for outputAr in outputAspectRatior {
if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
alreadyExist = true
break
}
}
if !alreadyExist {
outputAspectRatior.append(ar)
}
if param.flip {
outputAspectRatior.append(1.0 / ar)
}
}
param.newAspectRatios = outputAspectRatior
let aspectRatiosSize = uint(outputAspectRatior.count)
let maxSizeSize: uint = uint(param.maxSizes.count)
let minSizeSize: uint = uint(param.minSizes.count)
let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
let minSize = param.minSizes.last ?? 0.0
let maxSize = param.maxSizes.last ?? 0.0
metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
}
func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setTexture(param.outputVariances.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 0)
encoder.setBytes(param.aspectRatios, length: MemoryLayout<Float32>.size * param.aspectRatios.count, index: 1)
encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "priorbox")
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * newScale [[buffer(0)]],
const device float4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -314,6 +314,252 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample>
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output;
for (int w = 0; w < param.kernelW; ++w) {
int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX;
if (input_x < 0 || input_x >= int(inTexture.get_width())) {
continue;
}
for (int h = 0; h < param.kernelH; ++h) {
int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY;
if (input_y < 0 || input_y >= int(inTexture.get_height())) {
continue;
}
uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
for (int slice = 0; slice < input_array_size; ++slice) {
float4 input;
float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
input = inTexture.sample(sample, float2(input_x, input_x), slice);
output.x += dot(input, kernel_slice);
output.x += dot(input, kernel_slice1);
output.x += dot(input, kernel_slice2);
output.x += dot(input, kernel_slice3);
}
}
}
outTexture.write(output, gid.xy, gid.z);
}
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - convAdd
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
......@@ -357,7 +603,61 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
......@@ -395,6 +695,7 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -36,18 +36,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
}
kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *biasTerms [[buffer(0)]],
......@@ -60,18 +48,6 @@ kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[text
outTexture.write(input, gid.xy, gid.z);
}
kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
......@@ -230,76 +206,6 @@ kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture
outTexture.write(rr, gid.xy, gid.z);
}
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
int max_sizes_size;
float max_sizes[2];
bool clip;
float img_width;
float img_height;
float step_width;
float step_height;
float offset;
float aspect_ratios[2];
int aspect_ratios_size;
float center_x = (gid.x + offset) * step_width;
float center_y = (gid.y + offset) * step_width;
float box_width, box_height;
int min_sizes_size;
float min_sizes[2];
float min_size;
float max_size;
if (gid.z < aspect_ratios_size) {
float ar = aspect_ratios[gid.z];
box_width = min_size * sqrt(ar) / 2;
box_height = min_size / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / img_width;
box.y = (center_y - box_height) / img_height;
box.z = (center_x + box_width) / img_width;
box.w = (center_y + box_height) / img_height;
float4 res;
if (clip) {
res = min(max(box, 0.0), 1.0);
} else {
res = box;
}
outTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= aspect_ratios_size) {
int max_index = gid.z - aspect_ratios_size;
if (max_sizes_size > 0 && min_sizes_size > 0) {
box_width = box_height = sqrt(min_size * max_size) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / img_width;
max_box.y = (center_y - box_height) / img_height;
max_box.z = (center_x + box_width) / img_width;
max_box.w = (center_y + box_height) / img_height;
float4 res;
if (clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outTexture.write(max_box, gid.xy, gid.z);
}
}
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
float4 output;
output.x = input.x > 0 ? input.x : alpha[gid.z].x;
output.x = input.y > 0 ? input.y : alpha[gid.z].y;
output.x = input.z > 0 ? input.z : alpha[gid.z].z;
output.x = input.w > 0 ? input.w : alpha[gid.z].w;
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 output;
output.x = input.x > 0 ? input.x : alpha[alpha_to + gid.z].x;
output.x = input.y > 0 ? input.y : alpha[alpha_to + gid.z].y;
output.x = input.z > 0 ? input.z : alpha[alpha_to + gid.z].z;
output.x = input.w > 0 ? input.w : alpha[alpha_to + gid.z].w;
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
float4 output;
output.x = input.x > 0 ? input.x : alpha[0];
output.x = input.y > 0 ? input.y : alpha[0];
output.x = input.z > 0 ? input.z : alpha[0];
output.x = input.w > 0 ? input.w : alpha[0];
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
constant PriorBoxMetalParam &param [[buffer(0)]],
const device float *aspect_ratios [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(float4(relu), gid.xy, gid.z);
}
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class PreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
alpha = try PreluParam.inputAlpha(inputs: opDesc.inputs, from: inScope)
mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let mode: String
let alpha: Tensor<P>
let input: Texture<P>
var output: Texture<P>
}
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// para.output.dim = para.input.dim
}
typealias OpType = PreluOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("softmax delog")
let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
}
}
......@@ -19,27 +19,49 @@ class PriorBoxParam<P: PrecisionType>: OpParam {
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope)
output = try PriorBoxParam.getFirstTensor(key: "Boxes", map: opDesc.outputs, from: inScope)
variances = try PriorBoxParam.getFirstTensor(key: "Variances", map: opDesc.outputs, from: inScope)
output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope)
inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope)
outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope)
minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs)
maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs)
aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs)
variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs)
flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs)
clip = try PriorBoxParam.getAttr(key: "clop", attrs: opDesc.attrs)
stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs)
stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs)
offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let minSizes: [Float32]
let maxSizes: [Float32]
let aspectRatios: [Float32]
var newAspectRatios: [Float32]?
let variances: [Float32]
let flip: Bool
let clip: Bool
var stepW: Float32
var stepH: Float32
let offset: Float32
let input: Texture<P>
let inputImage: Texture<P>
var output: Texture<P>
let variances: Texture<P>
let outputVariances: Texture<P>
}
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
para.output.dim = para.input.dim
}
typealias OpType = PriorBoxOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
// try kernel.compute(commandBuffer: buffer, param: para)
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
......
......@@ -31,11 +31,11 @@ public struct Dim {
return dims.reduce(1) { $0 * $1 }
}
static func ==(left: Dim, right: Dim) -> Bool {
public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims;
}
subscript(index: Int) -> Int {
public subscript(index: Int) -> Int {
return dims[index];
}
......
......@@ -41,15 +41,15 @@ extension InputTexture {
public class Texture<P: PrecisionType>: Tensorial {
var dim: Dim
var tensorDim: Dim
private(set) var originDim: Dim
private(set) public var originDim: Dim
private var textureDesc: MTLTextureDescriptor!
var metalTexture: MTLTexture!
public var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3]
func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) {
let newDim = transpose.map { originDim[$0] }
let newLayout = transpose.map {layout.layoutWithDim[$0] }
let newLayout = transpose.map { layout.layoutWithDim[$0] }
layout = DataLayout.init(newLayout)
dim = Dim.init(inDim: newDim)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册