提交 c3c7b07d 编写于 作者: L liuruilong

add unit test

上级 b8f3d025
......@@ -215,10 +215,12 @@
FC0E2DB420EDC03C009C1FAC /* conv2d_27.w_0 in Resources */ = {isa = PBXBuildFile; fileRef = FC0E2CEA20EDC03B009C1FAC /* conv2d_27.w_0 */; };
FC0E2DB520EDC03C009C1FAC /* conv2d_33.w_0 in Resources */ = {isa = PBXBuildFile; fileRef = FC0E2CEB20EDC03B009C1FAC /* conv2d_33.w_0 */; };
FC0E2DB620EDC03C009C1FAC /* depthwise_conv2d_7.w_0 in Resources */ = {isa = PBXBuildFile; fileRef = FC0E2CEC20EDC03B009C1FAC /* depthwise_conv2d_7.w_0 */; };
FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; };
FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
......@@ -448,9 +450,11 @@
FC0E2CEA20EDC03B009C1FAC /* conv2d_27.w_0 */ = {isa = PBXFileReference; lastKnownFileType = file; path = conv2d_27.w_0; sourceTree = "<group>"; };
FC0E2CEB20EDC03B009C1FAC /* conv2d_33.w_0 */ = {isa = PBXFileReference; lastKnownFileType = file; path = conv2d_33.w_0; sourceTree = "<group>"; };
FC0E2CEC20EDC03B009C1FAC /* depthwise_conv2d_7.w_0 */ = {isa = PBXFileReference; lastKnownFileType = file; path = depthwise_conv2d_7.w_0; sourceTree = "<group>"; };
FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
......@@ -514,6 +518,7 @@
FC039B8820E11C560081E9F8 /* Assets.xcassets */,
FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
FC039B8D20E11C560081E9F8 /* Info.plist */,
FC3602C72108580600FACB58 /* MetalHelper.swift */,
path = "paddle-mobile-demo";
sourceTree = "<group>";
......@@ -521,6 +526,7 @@
FC0E2C1D20EDC030009C1FAC /* images */ = {
isa = PBXGroup;
children = (
FCEEE7D3210627A000444BEC /* banana.jpeg */,
FC0E2C1E20EDC030009C1FAC /* apple.jpg */,
name = images;
......@@ -900,6 +906,7 @@
FC0E2D1120EDC03B009C1FAC /* conv2d_10.w_0 in Resources */,
FC0E2D7120EDC03C009C1FAC /* conv2d_2.w_0 in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */,
FC0E2D5020EDC03C009C1FAC /* batch_norm_31.w_1 in Resources */,
FC0E2D2B20EDC03B009C1FAC /* batch_norm_34.w_1 in Resources */,
FC0E2D8F20EDC03C009C1FAC /* conv2d_20.w_0 in Resources */,
......@@ -1072,6 +1079,7 @@
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */,
runOnlyForDeploymentPostprocessing = 0;
// MetalHelper.swift
// paddle-mobile-demo
// Created by liuRuiLong on 2018/7/25.
// Copyright © 2018年 orange. All rights reserved.
import Metal
import paddle_mobile
import Foundation
class MetalHelper {
let device: MTLDevice
let queue: MTLCommandQueue
static let shared: MetalHelper = MetalHelper.init()
private init(){
device = MTLCreateSystemDefaultDevice()!
queue = device.makeCommandQueue()!
......@@ -19,10 +19,12 @@ kernel void preprocess(
gid.y >= outTexture.get_height()) {
// Subtract mean values, scale by 0.017, convert to BGR.
const auto means = float4(103.94f, 116.78f, 123.68f, 0.0f);
const float4 inColor = (float4(inTexture.read(gid)) * 255.0f - means) * 0.017f;
outTexture.write(float4(inColor.x, inColor.y, inColor.z, 0.0f), gid);
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (float4(float4(inTexture.read(gid))) * 255.0f - means) * 0.017f;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
......@@ -17,6 +17,8 @@ import MetalKit
import paddle_mobile
import MetalPerformanceShaders
let openTest: Bool = false
class PreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
......@@ -26,7 +28,6 @@ class PreProccess: CusomKernel {
class ViewController: UIViewController {
let device: MTLDevice! = MTLCreateSystemDefaultDevice()
var textureLoader: MTKTextureLoader!
// let queue: MTLCommandQueue
func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, complete: @escaping (MTLTexture) -> Void) {
......@@ -39,9 +40,9 @@ class ViewController: UIViewController {
tmpTextureDes.textureType = .type2D
tmpTextureDes.storageMode = .shared
tmpTextureDes.cpuCacheMode = .defaultCache
let dest = device.makeTexture(descriptor: tmpTextureDes)
let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
let scale = MPSImageLanczosScale.init(device: device)
let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
let buffer = queue.makeCommandBuffer()
scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
......@@ -51,12 +52,27 @@ class ViewController: UIViewController {
func unitTest() {
let unitTest = PaddleMobileUnitTest.init(inDevice: MetalHelper.shared.device, inQueue: MetalHelper.shared.queue)
override func viewDidLoad() {
let queue = device.makeCommandQueue()
textureLoader = MTKTextureLoader.init(device: device)
guard let appleImage = UIImage.init(named: "apple.jpg"), let cgImage = appleImage.cgImage else {
if openTest {
print(" - testing - ")
// return
let queue = MetalHelper.shared.queue
textureLoader = MTKTextureLoader.init(device: MetalHelper.shared.device)
guard let appleImage = UIImage.init(named: "banana.jpeg"), let cgImage = appleImage.cgImage else {
fatalError(" image nil !")
......@@ -65,19 +81,18 @@ class ViewController: UIViewController {
guard let inTexture = texture else {
fatalError(" texture is nil !")
scaleTexture(queue: queue!, input: inTexture) { (inputTexture) in
scaleTexture(queue: queue, input: inTexture) { (inputTexture) in
let loader = Loader<Float32>.init()
do {
let modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
let paraPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
let program = try loader.load(device: self.device, modelPath: modelPath, paraPath: paraPath)
let executor = try Executor<Float32>.init(inDevice: self.device, inQueue: queue!, inProgram: program)
let preprocessKernel = PreProccess.init(device: self.device)
let output = try executor.predict(input: inputTexture, expect: [1, 224, 224, 3], preProcessKernle: preprocessKernel)
// print(output)
let program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath)
let executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program)
let preprocessKernel = PreProccess.init(device: MetalHelper.shared.device)
try executor.predict(input: inputTexture, expect: [1, 224, 224, 3], completionHandle: { (result) in
print(result.resultArr.top(r: 5))
}, preProcessKernle: preprocessKernel)
} catch let error {
......@@ -36,6 +36,7 @@
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; };
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
......@@ -53,6 +54,7 @@
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6F20F31B720007374F /* ReshapeKernel.swift */; };
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
......@@ -92,6 +94,7 @@
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = "<group>"; };
FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
......@@ -109,6 +112,7 @@
FCD04E6F20F31B720007374F /* ReshapeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReshapeKernel.swift; sourceTree = "<group>"; };
FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; };
......@@ -182,7 +186,9 @@
FC039B9420E11C9A0081E9F8 /* Extensions.swift */,
FC039B9520E11C9A0081E9F8 /* Errors.swift */,
FC039B9620E11C9A0081E9F8 /* Types.swift */,
FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */,
FC60DB8820E9AAA500FF203F /* MetalExtension.swift */,
FCDC0FEA21099A1D00DC9EFB /* Tools.swift */,
path = Common;
sourceTree = "<group>";
......@@ -374,6 +380,7 @@
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
......@@ -396,6 +403,7 @@
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */,
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
......@@ -24,6 +24,7 @@ public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
if let inOpt = option {
return inOpt
......@@ -90,7 +91,7 @@ extension Array where Element: Comparable{
/// - Parameter r: 前 r 个元素
/// - Returns: [(原有位置, 排好位置的元素)]
func top(r: Int) -> [(Int, Element)] {
public func top(r: Int) -> [(Int, Element)] {
precondition(r <= self.count)
return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
......@@ -61,6 +61,47 @@ extension MTLDevice {
func makeBuffer<P>(value: [P]) -> MTLBuffer {
let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
for i in 0..<value.count {
contents?[i] = value[i]
return buffer!
func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
let textureDesc = MTLTextureDescriptor.init()
textureDesc.width = textureWidth
textureDesc.height = textureHeight
textureDesc.depth = 1
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.pixelFormat = .rgba32Float
textureDesc.textureType = .type2DArray
textureDesc.storageMode = .shared
textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = arrayLength
let texture = makeTexture(descriptor: textureDesc)!
if arrayLength == 1 && value.count >= 4{
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size)
for i in 0..<value.count {
pointer[i] = value[i]
let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow)
} else {
return texture
extension MTLComputeCommandEncoder {
......@@ -79,63 +120,117 @@ extension MTLComputeCommandEncoder {
let groupDepth = slices
let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
// print("groups: \(groups) ")
print("groups: \(groups) ")
print("threads per group: \(threadsPerGroup)")
dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
public extension MTLTexture {
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print("texture: \(self)")
func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
var arr: [P] = floatArray { (p: P) -> P in
return p;
var result: [(index: Int, value: P)] = []
if arr.count > 100 && stridable {
for j in stride(from: 0, to: arr.count , by: arr.count / 100){
result.append((j, arr[j]))
} else {
for j in 0..<arr.count {
result.append((j, arr[j]))
return result
func floatArray<P, T>(res: (P) -> T) -> [T] {
var fArr: [T] = []
if textureType == .type2DArray {
for i in 0..<arrayLength{
var str: String = "slice: \(i): \n"
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
let p = bytes.assumingMemoryBound(to: T.self)
str += "2d array count : \(width * height * depth * 4) \n"
if stridable && width * height * depth * 4 > 100 {
for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
str += " index \(j): \(p[j])"
} else {
for j in 0..<width * height * depth * 4 {
str += " index \(j): \(p[j])"
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * depth * 4 {
} else if textureType == .type2D {
var str: String = "texture 2D: "
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
let p = bytes.assumingMemoryBound(to: T.self)
str += "2d count : \(width * width * 4) \n"
if stridable {
for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
str += " \(p[j])"
} else {
for j in 0..<width * height * 4 {
str += " \(p[j])"
let p = bytes.assumingMemoryBound(to: P.self)
for j in 0..<width * height * 4 {
return fArr
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print("texture: \(self)")
let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
// if textureType == .type2DArray {
// for i in 0..<arrayLength{
// var str: String = "slice: \(i): \n"
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
// let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d array count : \(width * height * depth * 4) \n"
// if stridable && width * height * depth * 4 > 100 {
// for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
// str += " index \(j): \(p[j])"
// }
// } else {
// for j in 0..<width * height * depth * 4 {
// str += " index \(j): \(p[j])"
// }
// }
// bytes.deallocate()
// print(str)
// }
// } else if textureType == .type2D {
// var str: String = "texture 2D: "
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
// let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d count : \(width * width * 4) \n"
// if stridable {
// for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
// str += "index \(j): \(p[j]) "
// }
// } else {
// for j in 0..<width * height * 4 {
// str += "index \(j): \(p[j]) "
// }
// }
// print(str)
// bytes.deallocate()
// }
return nil
......@@ -158,7 +253,24 @@ public extension MTLBuffer {
return nil
func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.width = textureWidth
textureDesc.height = textureHeight
textureDesc.depth = 1
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.pixelFormat = .rgba32Float
textureDesc.textureType = .type2DArray
textureDesc.storageMode = .shared
textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = arrayLength
let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
return texture
// TestConvAddBatchNormRelu.swift
// paddle-mobile-demo
// Created by liuRuiLong on 2018/7/25.
// Copyright © 2018年 orange. All rights reserved.
import Metal
import Foundation
public class PaddleMobileUnitTest {
let device: MTLDevice
let queue: MTLCommandQueue
public init(inDevice: MTLDevice, inQueue: MTLCommandQueue) {
device = inDevice
queue = inQueue
public func testConvAddBnRelu() {
let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
let input: [Float32] = [
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
1.0, 2.0, 3.0, 4.0,
let filter: [Float32] = [
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0,
let biase: [Float32] = [1.0, 1.0, 1.0, 100.0]
let newScalue: [Float32] = [1.0, 1.0, 1.0, 1.0]
let newBiase: [Float32] = [1.0, 1.0, 1.0, 1.0]
let inputeTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 3, arrayLength: 1)
let filterBuffer = device.makeBuffer(value: filter)
// biase
let biaseBuffer = device.makeBuffer(value: biase)
// new scale
let newScalueBuffer = device.makeBuffer(value: newScalue)
// new biase
let newBiaseBuffer = device.makeBuffer(value: newBiase)
let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 2, textureHeight: 2, arrayLength: 1)
let filterSize: (width: Int, height: Int, channel: Int) = (3, 3, 4)
let paddings: (Int, Int) = (1, 1)
let stride: (Int, Int) = (2, 2)
let offsetX = filterSize.width/2 - paddings.0
let offsetY = filterSize.height/2 - paddings.1
let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0))
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param)
convAddBnReluKernel.test(commandBuffer: buffer, param: param)
buffer.addCompletedHandler { (buffer) in
let _: Float32? = inputeTexture.logDesc(header: "input texture", stridable: false)
let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
// let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
// ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
// Tools.swift
// paddle-mobile
// Created by liuRuiLong on 2018/7/26.
// Copyright © 2018年 orange. All rights reserved.
import Foundation
func writeToLibrary<P: PrecisionType>(fileName: String, array: [P]) {
let libraryPath = NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true).last ?! " library path get error "
let filePath = libraryPath + "/" + fileName
let fileManager = FileManager.init()
fileManager.createFile(atPath: filePath, contents: nil, attributes: nil)
let fileHandler = FileHandle.init(forWritingAtPath: filePath) ?! " file handler nil "
let data = Data.init(buffer: UnsafeBufferPointer.init(start: array, count: array.count))
......@@ -17,6 +17,7 @@ import Foundation
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
public let resultArr: [P]
public init(inDim: [Int], inResult: [P]) {
dim = inDim
resultArr = inResult
......@@ -56,7 +57,7 @@ public class Executor<P: PrecisionType> {
queue = inQueue
for block in inProgram.programDesc.blocks {
for i in 0..<block.ops.count {
for i in 0..<2 {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
......@@ -79,12 +80,11 @@ public class Executor<P: PrecisionType> {
public func predict(input: MTLTexture, expect: [Int], preProcessKernle: CusomKernel? = nil) throws -> ResultHolder<P> {
public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
let resInput: MTLTexture
if let inPre = preProcessKernle {
do {
try inPre.compute(inputTexuture: input, commandBuffer: buffer)
......@@ -109,26 +109,36 @@ public class Executor<P: PrecisionType> {
buffer.addCompletedHandler { (commandbuffer) in
let inputArr = resInput.floatArray(res: { (p:P) -> P in
return p
// print(inputArr)
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
for op in self.ops {
guard let outputVar = self.program.scope.output() else {
fatalError("output nil")
guard let output = outputVar as? Texture<P> else {
fatalError("output var type error")
let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
return p
let afterDate = Date.init()
print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))")
guard let outputVar = program.scope.output() else {
throw PaddleMobileError.netError(message: "output nil")
guard let output = outputVar as? ResultHolder<P> else {
throw PaddleMobileError.netError(message: "output var type error")
return output
//public let paddle_executor: Executor = Executor.init()
......@@ -65,6 +65,7 @@ protocol OperatorProtocol {
associatedtype ParamType
associatedtype KerType: Computable where Self.KerType.ParamType == ParamType
var type: String { get }
var scope: Scope { get }
var inputs: [String : [String]] { get }
var paraInputs: [String : [String]] { get set }
var outpus: [String : [String]] { get }
......@@ -93,9 +94,11 @@ class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
type = opDesc.type
scope = inScope
inputs = opDesc.inputs
outpus = opDesc.outputs
attrs = opDesc.attrs
......@@ -112,12 +112,13 @@ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKer
// para.filter.logDataPointer(header: "filter data pointer: ")
// print("filter: \(para.filter)")
print("biase: \(para.y)")
print("padding: \(para.paddings)")
print("stride: \(para.stride)")
// print("biase: \(para.y)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: true)
let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: true)
let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
......@@ -62,7 +62,7 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
func delogOutput() {
// para.input.mtlTexture.logDesc()
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: true)
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
......@@ -15,13 +15,14 @@
import Foundation
class FetchParam<P: PrecisionType>: OpParam{
var output: ResultHolder<P> = ResultHolder.init(inDim: [], inResult: [])
var output: Texture<P>
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = input
} catch let error {
throw error
......@@ -47,6 +48,7 @@ class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runab
typealias OpType = FetchOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
scope.setOutput(output: para.output)
......@@ -14,7 +14,38 @@
import Foundation
class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
struct ConvAddBatchNormReluTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
var metalParam: MetalConvParam
let filterBuffer: MTLBuffer
let biaseBuffer: MTLBuffer
let newScaleBuffer: MTLBuffer
let newBiaseBuffer: MTLBuffer
let filterSize: (width: Int, height: Int, channel: Int)
init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
inputTexture = inInputTexture
outputTexture = inOutputTexture
metalParam = inMetalParam
filterBuffer = inFilterBuffer
biaseBuffer = inBiaseBuffer
newScaleBuffer = inNewScaleBuffer
newBiaseBuffer = inNewBiaseBuffer
filterSize = inFilterSize
class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
} else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
} else {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
......@@ -27,7 +58,6 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
......@@ -69,7 +99,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
print("ConvAddBatchNormReluKernel compute")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
......@@ -81,4 +111,22 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
print("ConvAddBatchNormReluKernel compute")
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var inMetalParam = param.metalParam
encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
......@@ -81,10 +81,11 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample>
short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
......@@ -134,7 +135,9 @@ kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample>
short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
......@@ -175,7 +178,9 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
......@@ -219,7 +224,9 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access
uint output_slice = gid.z;
short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
......@@ -15,7 +15,7 @@
import Foundation
struct MetalConvParam {
public struct MetalConvParam {
let offsetX: Int16
let offsetY: Int16
let offsetZ: Int16
......@@ -15,6 +15,16 @@
import Metal
import Foundation
public protocol TestParam {
public protocol Testable {
associatedtype TestParamType: TestParam
func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
init(device: MTLDevice, testParam: TestParamType)
protocol Computable {
associatedtype ParamType: OpParam
func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
......@@ -37,8 +37,8 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
......@@ -119,7 +119,7 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
float4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
......@@ -136,11 +136,6 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
r /= pm.ksizeX * pm.ksizeY;
// float4 r;
// r[0] = 1.0 * pm.ksizeX;
// r[1] = 2.0;
// r[2] = 3.0;
// r[3] = 4.0;
outTexture.write(r, gid.xy, gid.z);
......@@ -151,7 +146,7 @@ kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 r = inTexture.read(uint2(0, 0), gid.z);
outTexture.write(r, gid.xy, gid.z);
......@@ -39,7 +39,8 @@ public struct Dim {
return dims[index];
private var dims: [Int]
private(set) var dims: [Int]
private init(){
......@@ -22,6 +22,7 @@ class InputTexture {
mtlTexture = inMTLTexture
expectDim = inExpectDim
extension InputTexture {
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册