......@@ -18,17 +18,17 @@
FC27991321343A3A000B6BAD /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC27991221343A3A000B6BAD /* CPUCompute.mm */; };
FC3C800F2133F46600D1295E /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3C800E2133F46600D1295E /* MobileNetSSD.swift */; };
FC3C80112133F4AB00D1295E /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3C80102133F4AB00D1295E /* MobileNet.swift */; };
FC8CFEDF213521C10094D569 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEDD213521C10094D569 /* genet_model */; };
FC8CFEE0213521C10094D569 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEDE213521C10094D569 /* genet_params */; };
FC8CFEE2213524EA0094D569 /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC8CFEE1213524EA0094D569 /* Genet.swift */; };
FC8CFEE62135452C0094D569 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE42135452B0094D569 /* genet_params */; };
FC8CFEE72135452C0094D569 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE52135452B0094D569 /* genet_model */; };
FC8CFEF8213551D10094D569 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEF6213551D00094D569 /* params */; };
FC8CFEF9213551D10094D569 /* model in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEF7213551D00094D569 /* model */; };
FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; };
FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; };
FCA3A16121313E1F00084FE5 /* hand.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FCA3A16021313E1F00084FE5 /* hand.jpg */; };
FCBCCC522122EEDC00D94F7E /* ssd_hand_params in Resources */ = {isa = PBXBuildFile; fileRef = FCBCCC502122EEDC00D94F7E /* ssd_hand_params */; };
FCBCCC532122EEDC00D94F7E /* ssd_hand_model in Resources */ = {isa = PBXBuildFile; fileRef = FCBCCC512122EEDC00D94F7E /* ssd_hand_model */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
......@@ -67,17 +67,17 @@
FC27991421343A46000B6BAD /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
FC3C800E2133F46600D1295E /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
FC3C80102133F4AB00D1295E /* MobileNet.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC8CFEDD213521C10094D569 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = "<group>"; };
FC8CFEDE213521C10094D569 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = "<group>"; };
FC8CFEE1213524EA0094D569 /* Genet.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
FC8CFEE42135452B0094D569 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = "<group>"; };
FC8CFEE52135452B0094D569 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = "<group>"; };
FC8CFEF6213551D00094D569 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
FC8CFEF7213551D00094D569 /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
FCA3A16021313E1F00084FE5 /* hand.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = hand.jpg; sourceTree = "<group>"; };
FCBCCC502122EEDC00D94F7E /* ssd_hand_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = ssd_hand_params; sourceTree = "<group>"; };
FCBCCC512122EEDC00D94F7E /* ssd_hand_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = ssd_hand_model; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
......@@ -165,9 +165,9 @@
FC0E2C2020EDC03B009C1FAC /* models */ = {
isa = PBXGroup;
children = (
FC8CFED821351F5D0094D569 /* genet */,
FC8CFEF5213551D00094D569 /* mobilenet */,
FC8CFEE32135452B0094D569 /* genet */,
FCBCCC4F2122EEDC00D94F7E /* mobilenet_ssd_hand */,
FCD04E6020F3146A0007374F /* mobilenet */,
name = models;
path = ../../models;
......@@ -188,31 +188,31 @@
path = Net;
sourceTree = "<group>";
FC8CFED821351F5D0094D569 /* genet */ = {
FC8CFEE32135452B0094D569 /* genet */ = {
isa = PBXGroup;
children = (
FC8CFEDD213521C10094D569 /* genet_model */,
FC8CFEDE213521C10094D569 /* genet_params */,
FC8CFEE42135452B0094D569 /* genet_params */,
FC8CFEE52135452B0094D569 /* genet_model */,
path = genet;
sourceTree = "<group>";
FCBCCC4F2122EEDC00D94F7E /* mobilenet_ssd_hand */ = {
FC8CFEF5213551D00094D569 /* mobilenet */ = {
isa = PBXGroup;
children = (
FCBCCC502122EEDC00D94F7E /* ssd_hand_params */,
FCBCCC512122EEDC00D94F7E /* ssd_hand_model */,
FC8CFEF6213551D00094D569 /* params */,
FC8CFEF7213551D00094D569 /* model */,
path = mobilenet_ssd_hand;
path = mobilenet;
sourceTree = "<group>";
FCD04E6020F3146A0007374F /* mobilenet */ = {
FCBCCC4F2122EEDC00D94F7E /* mobilenet_ssd_hand */ = {
isa = PBXGroup;
children = (
FCD04E6120F3146A0007374F /* params */,
FCD04E6220F3146A0007374F /* model */,
FCBCCC502122EEDC00D94F7E /* ssd_hand_params */,
FCBCCC512122EEDC00D94F7E /* ssd_hand_model */,
path = mobilenet;
path = mobilenet_ssd_hand;
sourceTree = "<group>";
/* End PBXGroup section */
......@@ -277,19 +277,19 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCD04E6320F3146B0007374F /* params in Resources */,
FC8CFEF8213551D10094D569 /* params in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC8CFEE0213521C10094D569 /* genet_params in Resources */,
FC8CFEF9213551D10094D569 /* model in Resources */,
FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */,
FC8CFEE72135452C0094D569 /* genet_model in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FCBCCC522122EEDC00D94F7E /* ssd_hand_params in Resources */,
FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */,
FC918193211DC70500B6F354 /* iphone.JPG in Resources */,
FCDFD41B211D91C7005AB38B /* synset.txt in Resources */,
FCD04E6420F3146B0007374F /* model in Resources */,
FC8CFEDF213521C10094D569 /* genet_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FCA3A16121313E1F00084FE5 /* hand.jpg in Resources */,
FC8CFEE62135452C0094D569 /* genet_params in Resources */,
FCBCCC532122EEDC00D94F7E /* ssd_hand_model in Resources */,
runOnlyForDeploymentPostprocessing = 0;
......@@ -317,18 +317,3 @@ void MultiClassNMSCompute(NMSParam *param) {
......@@ -35,7 +35,7 @@ class Genet: Net {
var preprocessKernel: CusomKernel
let dim = [1, 128, 128, 3]
let dim = (n: 1, h: 128, w: 128, c: 3)
let modelPath: String
let paramPath: String
let modelDir: String
// MetalHelper.swift
// paddle-mobile-demo
// Created by liuRuiLong on 2018/7/25.
// Copyright © 2018年 orange. All rights reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import MetalKit
import Foundation
import paddle_mobile
import MetalPerformanceShaders
class MetalHelper {
let device: MTLDevice
let queue: MTLCommandQueue
let textureLoader: MTKTextureLoader
static let shared: MetalHelper = MetalHelper.init()
private init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
let device: MTLDevice
let queue: MTLCommandQueue
let textureLoader: MTKTextureLoader
static let shared: MetalHelper = MetalHelper.init()
private init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
guard let buffer = queue.makeCommandBuffer() else {
let scaleKernel = ScaleKernel.init(device: MetalHelper.shared.device, shape: CusomKernel.Shape.init(inWidth: size.width, inHeight: size.height, inChannel: 3))
do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
} catch let error {
static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
let tmpTextureDes = MTLTextureDescriptor.init()
tmpTextureDes.width = size.width
tmpTextureDes.height = size.height
tmpTextureDes.depth = 1
tmpTextureDes.usage = [.shaderRead, .shaderWrite]
tmpTextureDes.pixelFormat = .rgba32Float
tmpTextureDes.textureType = .type2D
tmpTextureDes.storageMode = .shared
tmpTextureDes.cpuCacheMode = .defaultCache
let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
let buffer = queue.makeCommandBuffer()
scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
buffer?.addCompletedHandler({ (buffer) in
buffer.addCompletedHandler { (buffer) in
......@@ -58,7 +58,7 @@ class MobileNet: Net{
var preprocessKernel: CusomKernel
let dim = [1, 224, 224, 3]
let dim = (n: 1, h: 224, w: 224, c: 3)
let modelPath: String
let paramPath: String
let modelDir: String
......@@ -73,7 +73,7 @@ class MobileNet_ssd_hand: Net{
var preprocessKernel: CusomKernel
let dim = [1, 300, 300, 3]
let dim: (n: Int, h: Int, w: Int, c: Int) = (n: 1, h: 300, w: 300, c: 3)
let modelPath: String
let paramPath: String
let modelDir: String
......@@ -20,12 +20,17 @@ import Foundation
import paddle_mobile
import MetalPerformanceShaders
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
protocol Net {
var program: Program? { get set }
var executor: Executor<Float32>? { get set }
var except: Int { get }
var dim: [Int] { get }
var dim: (n: Int, h: Int, w: Int, c: Int) { get }
var modelPath: String { get }
var paramPath: String { get }
var modelDir: String { get }
......@@ -56,7 +61,7 @@ extension Net {
guard let inExecutor = executor else {
fatalError(" 请先 load ")
try inExecutor.predict(input: inTexture, dim: dim, completionHandle: { (result) in
try inExecutor.predict(input: inTexture, dim: [dim.n, dim.h, dim.w, dim.c], completionHandle: { (result) in
var resultArr:[Float32] = []
resultArr = self.fetchResult(paddleMobileRes: result)
......@@ -73,7 +78,7 @@ extension Net {
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (dim.w, dim.h)) { (resTexture) in
// PreProcessKernel.metal
// paddle-mobile-demo
// Created by liuRuiLong on 2018/7/20.
// Copyright © 2018年 orange. All rights reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
......@@ -68,17 +74,33 @@ kernel void mobilenet_ssd_preprocess_half(
kernel void genet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
kernel void genet_preprocess(texture2d<float, access::read> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]])
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
......@@ -20,6 +20,7 @@ import MetalPerformanceShaders
let threadSupport = [1]
let modelHelperMap: [SupportModel : Net] = [.mobilenet_ssd : MobileNet_ssd_hand.init(), .genet : Genet.init()]
//, .genet : Genet.init()
//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
enum SupportModel: String{
......@@ -28,11 +29,10 @@ enum SupportModel: String{
case genet = "enet"
static func supportedModels() -> [SupportModel] {
return [.mobilenet_ssd, .genet]
return [.mobilenet_ssd ,.genet]
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
......@@ -52,6 +52,7 @@
FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */; };
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA3A1642132A5EB00084FE5 /* Common.metal */; };
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
......@@ -139,6 +140,7 @@
FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReshapeKernel.metal; sourceTree = "<group>"; };
FCA3A1642132A5EB00084FE5 /* Common.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Common.metal; sourceTree = "<group>"; };
FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvTransposeKernel.metal; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
......@@ -369,6 +371,7 @@
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
FCA3A1622132A4AC00084FE5 /* ReshapeKernel.metal */,
FCA3A1642132A5EB00084FE5 /* Common.metal */,
FCA67B1621364EF000BD58AA /* ConvTransposeKernel.metal */,
path = metal;
sourceTree = "<group>";
......@@ -536,6 +539,7 @@
FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */,
FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */,
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */,
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
......@@ -33,7 +33,7 @@
buildConfiguration = "Debug"
buildConfiguration = "Release"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
......@@ -210,7 +210,7 @@ extension MTLDevice {
extension MTLComputeCommandEncoder {
func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
public func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
let slices = (outTexture.arrayLength * 4 + 3)/4
let width = computePipline.threadExecutionWidth
......@@ -62,7 +62,7 @@ public class Executor<P: PrecisionType> {
queue = inQueue
for block in inProgram.programDesc.blocks {
for i in 0..<block.ops.count {
for i in 0..<4 {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
......@@ -110,13 +110,12 @@ public class Executor<P: PrecisionType> {
buffer.addCompletedHandler { (commandbuffer) in
// return;
// let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p
// })
// writeToLibrary(fileName: "input_hand", array: inputArr)
// writeToLibrary(fileName: "genet_input_hand", array: inputArr)
// print("write to library done")
// return
// print(inputArr)
......@@ -125,10 +124,13 @@ public class Executor<P: PrecisionType> {
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for op in self.ops {
// op.delogOutput()
// }
// return
for i in 0..<self.ops.count {
let op = self.ops[i]
print(" 第 \(i) 个 op: ")
let afterDate = Date.init()
......@@ -97,6 +97,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
......@@ -57,7 +57,7 @@ open class CusomKernel: Kernel {
channel = inChannel
let outputTexture: MTLTexture
public let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
......@@ -72,7 +72,7 @@ open class CusomKernel: Kernel {
super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
......@@ -356,78 +356,6 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access
outTexture.write(output, gid.xy, gid.z);
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
int input_array_size = inTexture.get_array_size();
uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output;
for (int w = 0; w < param.kernelW; ++w) {
int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX;
if (input_x < 0 || input_x >= int(inTexture.get_width())) {
for (int h = 0; h < param.kernelH; ++h) {
int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY;
if (input_y < 0 || input_y >= int(inTexture.get_height())) {
uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
for (int slice = 0; slice < input_array_size; ++slice) {
float4 input;
float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
input = inTexture.sample(sample, float2(input_x, input_x), slice);
output.x += dot(input, kernel_slice);
output.x += dot(input, kernel_slice1);
output.x += dot(input, kernel_slice2);
output.x += dot(input, kernel_slice3);
outTexture.write(output, gid.xy, gid.z);
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
int input_array_size = inTexture.get_array_size();
uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output;
for (int w = 0; w < param.kernelW; ++w) {
int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX;
if (input_x < 0 || input_x >= int(inTexture.get_width())) {
for (int h = 0; h < param.kernelH; ++h) {
int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY;
if (input_y < 0 || input_y >= int(inTexture.get_height())) {
uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
for (int slice = 0; slice < input_array_size; ++slice) {
float4 input;
float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
input = inTexture.sample(sample, float2(input_x, input_x), slice);
output.x += dot(input, kernel_slice);
output.x += dot(input, kernel_slice1);
output.x += dot(input, kernel_slice2);
output.x += dot(input, kernel_slice3);
outTexture.write(output, gid.xy, gid.z);
......@@ -59,12 +59,16 @@ class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable,
func delogOutput() {
print("pool2d delog")
let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
// print("pool2d delog")
// let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
// print(para.ksize)
// print(para.stride)
// print(para.padding)
// print(para.poolType)
// let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
......@@ -50,8 +50,8 @@ class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runabl
func delogOutput() {
print("softmax delog")
let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
// print("softmax delog")
// let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
// let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
......@@ -43,6 +43,12 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
throw error
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
