提交 b0a47b5b 编写于 作者: L liuruilong

update

上级 2e51fd63
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="4MS-jc-i6A">
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
......@@ -303,7 +303,7 @@
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="hGb-Pb-icS" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-514" y="-3"/>
<point key="canvasLocation" x="-721" y="-427"/>
</scene>
</scenes>
<resources>
......
......@@ -20,12 +20,6 @@ class MultiPredictViewController: UIViewController {
let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
}
@IBAction func predictAct(_ sender: Any) {
......
......@@ -169,6 +169,8 @@ public class VideoCapture: NSObject {
}
}
@available(iOS 10.0, *)
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview,
......@@ -191,6 +193,7 @@ extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
}
}
@available(iOS 10.0, *)
extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
......
......@@ -50,7 +50,7 @@ class ViewController: UIViewController {
@IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView!
var videoCapture: VideoCapture!
// var videoCapture: VideoCapture!
var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>?
......@@ -106,6 +106,12 @@ class ViewController: UIViewController {
return
}
// for _ in 0..<10{
// runner.predict(texture: inTexture) { (success, resultHolder) in
// resultHolder?.releasePointer()
// }
// }
let startDate = Date.init()
for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
......@@ -279,17 +285,18 @@ extension ViewController: VideoCaptureDelegate{
}
func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
// if !bool1 {
// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
self.predictTexture(texture: texture!)
// }
// bool1 = true
// }
}
// @available(iOS 10.0, *)
// func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
//// if !bool1 {
//// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
// self.predictTexture(texture: texture!)
//// }
//
//
//// bool1 = true
//// }
//
// }
}
......
......@@ -70,7 +70,7 @@
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
FC4FD9792140E4980073E130 /* PaddleMobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
......@@ -122,6 +122,12 @@
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */; };
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */; };
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */; };
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */; };
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */; };
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */; };
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; };
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
......@@ -199,7 +205,7 @@
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobile.h; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
......@@ -251,6 +257,12 @@
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluOp.swift; sourceTree = "<group>"; };
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddPreluKernel.swift; sourceTree = "<group>"; };
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.inc.metal; sourceTree = "<group>"; };
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ElementwiseAddPreluKernel.metal; sourceTree = "<group>"; };
FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; };
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
......@@ -390,6 +402,8 @@
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */,
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */,
FCE3A1A82153DE5100C37CDE /* ConvAddAddPreluOp.swift */,
FCE3A1AC2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift */,
);
path = Operators;
sourceTree = "<group>";
......@@ -439,6 +453,8 @@
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
);
path = Kernels;
sourceTree = "<group>";
......@@ -447,7 +463,7 @@
isa = PBXGroup;
children = (
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */,
FC4FD9772140E4980073E130 /* PaddleMobile.h */,
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */,
);
path = CPU;
sourceTree = "<group>";
......@@ -506,6 +522,8 @@
FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
);
path = metal;
sourceTree = "<group>";
......@@ -517,7 +535,7 @@
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
FC4FD9792140E4980073E130 /* PaddleMobile.h in Headers */,
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
......@@ -632,8 +650,10 @@
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
......@@ -668,6 +688,7 @@
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
......@@ -700,6 +721,7 @@
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */,
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FCE3A1AF2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift in Sources */,
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
......@@ -718,7 +740,9 @@
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
......
......@@ -33,7 +33,7 @@
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
buildConfiguration = "Release"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
......
......@@ -90,32 +90,32 @@ public class MobileNet_ssd_AR: Net{
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
program.scope[output] = originTexture
if i == 99 {
opDesc.attrs["axis"] = 0
} else {
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
}
for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
for i in [60, 101, 90, 97, 70, 80] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
......@@ -124,7 +124,7 @@ public class MobileNet_ssd_AR: Net{
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
}
for i in [102] {
let opDesc = program.programDesc.blocks[0].ops[i]
for output in opDesc.outputs["Out"]! {
......
......@@ -65,7 +65,9 @@ class OpCreator<P: PrecisionType> {
gSplit : SplitOp<P>.creat,
gShape : ShapeOp<P>.creat,
gFlatten : FlattenOp<P>.creat,
gConvAddPreluType : ConvAddPreluOp<P>.creat]
gConvAddPreluType : ConvAddPreluOp<P>.creat,
gConvAddAddPreluType : ConvAddAddPreluOp<P>.creat,
gElementwiseAddPreluType: ElementwiseAddPreluOp<P>.creat]
private init(){}
}
......@@ -157,6 +157,9 @@ let gSplit = "split"
let gShape = "shape"
let gFlatten = "flatten"
let gConvAddPreluType = "conv_add_prelu"
let gConvAddAddPreluType = "conv_add_add_prelu"
let gElementwiseAddPreluType = "elementwise_add_prelu"
let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]),
gBatchNormType : (inputs: ["X"], outputs: ["Y"]),
......@@ -183,6 +186,7 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out
gSplit : (inputs: ["X"], outputs: ["Out"]),
gShape : (inputs: ["Input"], outputs: ["Out"]),
gFlatten : (inputs: ["X"], outputs: ["Out"]),
gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"])
gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gConvAddAddPreluType : (inputs: ["Input"], outputs: ["Out"]),
gElementwiseAddPreluType : (inputs: ["X"], outputs: ["Out"])
]
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
alpha = try ConvAddAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ConvAddAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
y = try ConvAddAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvAddAddPreluOp<P: PrecisionType>: Operator<ConvAddAddPreluKernel<P>, ConvAddAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddAddPreluOp<P>
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddAddPreluType
}
static func needCheck() -> [(Int, String)] {
return [(2, "Y"), (2, "X")]
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
inputX = try ElementwiseAddPreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ElementwiseAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try ElementwiseAddPreluParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error {
throw error
}
do {
inputY = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch _ {
let tensorY: Tensor<P> = try ElementwiseAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
// if computePrecision == .Float32 {
// super.init(device: device, inFunctionName: "elementwise_add")
// } else if computePrecision == .Float16 {
// super.init(device: device, inFunctionName: "elementwise_add_half")
// } else {
// fatalError()
// }
// }
var offset = axis
if axis == -1 {
offset = inputX.tensorDim.cout() - inputY.tensorDim.cout()
}
for i in 0..<(inputY.tensorDim.cout()) {
assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i])
}
}
let mode: String
let alpha: Tensor<P>
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var axis: Int
}
class ElementwiseAddPreluOp<P: PrecisionType>: Operator<ElementwiseAddPreluKernel<P>, ElementwiseAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gElementwiseAddType)
_ = beginNode
--> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gElementwiseAddPreluType
}
typealias OpType = ElementwiseAddPreluOp<P>
func inferShape() {
// para.output.dim = para.input.dim
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output)
let padToFourDim = para.output.padToFourDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
}
} else {
fatalError(" unsupport yet ")
}
} else {
fatalError()
}
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
// print(" function: \(functionName)")
// print("offset x: \(offsetX)")
// print("offset y: \(offsetY)")
let offsetZ = 0.0
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_element_float")
} else {
super.init(device: device, inFunctionName: "elementwise_add_prelu_float")
}
} else if computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
} else {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
}
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddPreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
......@@ -40,7 +40,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 output = biase[gid.z];
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
......@@ -57,7 +57,7 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + biase[gid.z];
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -85,7 +85,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
......@@ -125,7 +125,7 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w);
}
}
output = output + biase[gid.z];
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -153,7 +153,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
float4 input[5];
......@@ -183,7 +183,7 @@ kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w);
}
}
output = output + biase[gid.z];
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -212,7 +212,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
float4 input[5];
......@@ -242,7 +242,7 @@ kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[text
output.w += dot(input[j], weight_w);
}
}
output = output + biase[gid.z];
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -265,7 +265,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 output = biase[gid.z];
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......@@ -283,7 +283,7 @@ kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inText
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = output + biase[gid.z];
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
......@@ -312,7 +312,7 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 output = biase[gid.z];
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
......@@ -329,8 +329,8 @@ kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z);
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
......@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
......@@ -384,8 +384,8 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(float4(input[j]), float4(weight_w));
}
}
output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z);
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
......@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 output = biase[gid.z];
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z);
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
......@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
half4 input[5];
......@@ -480,11 +480,11 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
output.w += dot(input[j], weight_w);
}
}
output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z);
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
......@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
half4 input[5];
......@@ -542,8 +542,8 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
output.w += dot(input[j], weight_w);
}
}
output = output + float4(biase[gid.z]);
outTexture.write(half4(output), gid.xy, gid.z);
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
......
......@@ -49,7 +49,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) {
......@@ -67,7 +67,7 @@ kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input, weight_w);
}
output = output + float4(biase[gid.z]);
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
......@@ -126,7 +126,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
......@@ -166,7 +166,7 @@ kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sampl
output.w += dot(input[j], weight_w);
}
}
output = output + float4(biase[gid.z]);
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
......@@ -226,7 +226,7 @@ kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]);;
VECTOR(P, 4) output = biase[gid.z];;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5];
......@@ -316,7 +316,7 @@ kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sampl
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]);
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5];
......@@ -399,7 +399,7 @@ kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, acc
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(biase[gid.z]);
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
......@@ -69,6 +69,7 @@ class Node {
func to(depth: UInt) -> Node {
let beginNode = Node.init(inType: type)
beginNode.opDesc = opDesc
to(depth: depth - 1, withNode: beginNode)
return beginNode
}
......@@ -130,6 +131,7 @@ class Node {
for output in outputs {
let node = Node.init(inType: output.type)
node.opDesc = output.opDesc
withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node)
}
......@@ -182,10 +184,12 @@ extension Node: Equatable {
class ProgramOptimize<P: PrecisionType> {
// register fusion
let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
// ConvAddAddPreluOp<P>.self,
ConvAddPreluOp<P>.self,
ConvAddOp<P>.self,
ConvBNReluOp<P>.self,
DwConvBNReluOp<P>.self
DwConvBNReluOp<P>.self,
// ElementwiseAddPreluOp<P>.self
]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
......@@ -256,6 +260,15 @@ class ProgramOptimize<P: PrecisionType> {
}
}
}
let paramInputToChecks = checkNode.opDesc?.paraInputs[toCheck.1] ?? []
for paramInputToCheck in paramInputToChecks {
if node.output[paramInputToCheck] == nil {
if relationshipMap[paramInputToCheck] == nil {
canFolder = false
}
}
}
}
}
......
......@@ -83,7 +83,7 @@ public class Executor<P: PrecisionType> {
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<block.ops.count {
let op = block.ops[i]
let opDesc = block.ops[i]
do {
// print("in for i \(i): ")
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
......@@ -93,7 +93,7 @@ public class Executor<P: PrecisionType> {
//
// }
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
ops.append(op)
} catch let error {
throw error
......
......@@ -14,7 +14,7 @@
#pragma once
#import "PaddleMobile.h"
#import "PaddleMobileCPU.h"
#import "CPUCompute.h"
#import "PaddleMobileGPU.h"
#import <UIKit/UIKit.h>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册