diff --git a/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift b/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift index d8d4c554aeda2b5700203fc13b2384f29b795b76..e1ec72f00abbcc7e474d5691dc2f62e2a40f454f 100644 --- a/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift +++ b/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift @@ -63,7 +63,7 @@ let device = MTLCreateSystemDefaultDevice()! let commandQueue = device.makeCommandQueue()! var timeCosts = [Double]() var count = 0 -var totalCount = 10 +var totalCount = 100 var orderedVars: [String] = [] var varIndex = 0 @@ -107,7 +107,9 @@ class TestViewController: UIViewController { } private func getTestInfo() { - Alamofire.request("\(hostUrlStr)/getTestInfo").validate().responseJSON { (response) in + var testInfoRequest = URLRequest(url: URL(string: "\(hostUrlStr)/getTestInfo")!) + testInfoRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(testInfoRequest).validate().responseJSON { (response) in guard response.result.isSuccess else { self.testLog("getTestInfo request error") return @@ -194,7 +196,9 @@ class TestViewController: UIViewController { testResult.model = model let modelUrlStr = "\(hostUrlStr)/getFile/\(model.name)" - Alamofire.request("\(modelUrlStr)/model").validate().responseData { (response) in + var modelRequest = URLRequest(url: URL(string: "\(modelUrlStr)/model")!) + modelRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(modelRequest).validate().responseData { (response) in guard response.result.isSuccess, let modelData = response.result.value else { let msg = "get model \(model.name) error" self.testLog(msg) @@ -205,7 +209,9 @@ class TestViewController: UIViewController { //let modelData2 = try! Data(contentsOf: URL(fileURLWithPath: Bundle.main.path(forResource: "yolo_model_v3_16", ofType: nil)!)) let modelPtr = UnsafeMutablePointer.allocate(capacity: modelData.count) NSData(data: modelData).getBytes(modelPtr, length: modelData.count) - Alamofire.request("\(modelUrlStr)/params/\(model.paramsPrecision)").validate().responseData(completionHandler: { (response) in + var paramsRequest = URLRequest(url: URL(string: "\(modelUrlStr)/params/\(model.paramsPrecision)")!) + paramsRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(paramsRequest).validate().responseData(completionHandler: { (response) in guard response.result.isSuccess, let paramsData = response.result.value else { let msg = "get params \(model.name) error" self.testLog(msg) @@ -244,7 +250,10 @@ class TestViewController: UIViewController { } let fetchVar = fetchVars[0] net.inputDim = Dim(inDim: [dims[0], dims[2], dims[3], dims[1]]) - Alamofire.request("\(modelUrlStr)/data/\(feedVar.name.replacingOccurrences(of: "/", with: "_"))").validate().responseData(completionHandler: { (response) in + + var feedVarRequest = URLRequest(url: URL(string: "\(modelUrlStr)/data/\(feedVar.name.replacingOccurrences(of: "/", with: "_"))")!) + feedVarRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(feedVarRequest).validate().responseData(completionHandler: { (response) in guard response.result.isSuccess, let inputData = response.result.value else { let msg = "get var \(feedVar) error" self.testLog(msg) @@ -309,7 +318,8 @@ class TestViewController: UIViewController { for i in 0.. precision && abs(a - b) / min(abs(a), abs(b)) > 0.05 { + // && abs(a - b) / min(abs(a), abs(b)) > 0.05 + if abs(a - b) > precision { isResultEqual = false msg = "unequal: i: \(i) target: \(output[i]) result: \(resultHolder.result[i])" self.testLog(msg) @@ -403,7 +413,9 @@ class TestViewController: UIViewController { } } if severVars.contains(varName) { - Alamofire.request("\(urlString)/\(varName)").validate().responseData { (response) in + var severVarRequest = URLRequest(url: URL(string: "\(urlString)/\(varName)")!) + severVarRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(severVarRequest).validate().responseData { (response) in varIndex += 1 guard response.result.isSuccess, let varData = response.result.value else { self.compareVars(runner: runner, model: model, completion: completion) diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal index 185370c519df8e07317ed01469c6a710587307e8..9dacf6dd861f94d8f67956d09c0ae1d9a111a362 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal @@ -109,6 +109,7 @@ inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { struct ElementwiseAddParam { int32_t fast; + int32_t addByChannel; int32_t axis; int32_t ylen; int32_t xdim[4]; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal index d487e00fa3b251f271ce85df2c58a0ec088fe46a..05b8150842c8acceebdc75eeadeafa24cd89b088 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal @@ -19,8 +19,10 @@ using namespace metal; half4 getBiasHalf(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_array biasTexture) { half4 output; - if (addParam.fast) { + if (addParam.fast == 1) { output = biasTexture.read(gid.xy, gid.z); + } else if (addParam.addByChannel == 1) { + output = biasTexture.read(uint2(0, 0), gid.z); } else { int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; @@ -44,8 +46,10 @@ half4 getBiasHalf(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_a float4 getBias(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_array biasTexture) { float4 output; - if (addParam.fast) { + if (addParam.fast == 1) { output = float4(biasTexture.read(gid.xy, gid.z)); + } else if (addParam.addByChannel == 1) { + output = float4(biasTexture.read(uint2(0, 0), gid.z)); } else { int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal index 45559cb0e809050e869e77ebd472f2eaff0c5871..1748eadb79fdf1ededb3dc7f809420d10cf3c769 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal @@ -26,12 +26,12 @@ kernel void elementwise_add(texture2d_array inputX [[textur gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; float4 rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; @@ -62,12 +62,12 @@ kernel void elementwise_add_half(texture2d_array inputX [[te gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; half4 rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal index 65566952efa5a30c8601e751cbfb0ac6ccf21464..bed8763f362fbca62b9279ec9abb30e87e8c3f03 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal @@ -37,12 +37,12 @@ kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; VECTOR(P, 4) rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift index 28b78cfe1c5ae64a8e2430cddb18d556d357ffac..9bdf2c7d42560be781782280b2a1a30793753649 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift @@ -160,7 +160,7 @@ class ConvAddReluKernel: Kernel, Computable { try setupWithMPS(device: device, param: param) } else { if functionName == nil { - fatalError(" unsupport yet ") + throw PaddleMobileError.makeError(type: .netError, msg: "function name nil") } try super.init(device: device, inFunctionName: functionName, initContext: initContext) try setupWithoutMPS(device: device, param: param) @@ -371,7 +371,7 @@ class ConvAddReluKernel: Kernel, Computable { } private class func canMPSAddByElement(param: ConvAddReluParam

) -> Bool { - if let y = param.y, y.dim.dims == param.input.dim.dims { + if let y = param.y, y.dim.dims == param.output.dim.dims { return true } return false diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift index 8255790ac1eace20b9d04d43105cd39b6b1deced..cb9f09b81c989d1a708648500c5d387e22f297a0 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift @@ -46,7 +46,7 @@ class ConvKernel: Kernel, Computable { try setupWithMPS(device: device, param: param) } else { if functionName == nil { - fatalError(" unsupport yet ") + throw PaddleMobileError.makeError(type: .netError, msg: "function name nil") } try super.init(device: device, inFunctionName: functionName, initContext: initContext) try setupWithoutMPS(device: device, param: param) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift index 15a4e80768ab432520652c205d8fe54028d61fc5..168786e02a2c0e20acd68e1003375ee5da345b28 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift @@ -16,6 +16,7 @@ import Foundation struct ElementwiseAddMetalParam { var fast: Int32 = 0 + var addByChannel: Int32 = 0 var axis: Int32 = 0 var ylen: Int32 = 0 var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) @@ -91,6 +92,9 @@ class ElementwiseAddKernel: Kernel, Computable { // print("===> elementwise_add fast!!!") metalParam.fast = 1 } + if inputY.tensorDim.cout() == 1 && (axis == 1 || (axis == -1 && inputY.tensorDim.dims[0] == inputX.padToFourDim[1])) { + metalParam.addByChannel = 1 + } return metalParam } } diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift index ca4892d874cac8daaabb864d73c53f880b8ed2f1..d266f5560584ffc7f97a240b7ea0bbf06c3f91fa 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift @@ -23,27 +23,7 @@ class ElementwiseAddPreluKernel: Kernel, Computable { try param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision) - metalParam = ElementwiseAddMetalParam.init() - - let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) } - let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) } - let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) } - let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) } - - metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3]) - metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3]) - metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3]) - metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3]) - if param.axis == -1 { - metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout()) - } else { - metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) - } - metalParam.ylen = Int32(param.inputY.tensorDim.cout()) - if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { - // print("===> elementwise_add fast!!!") - metalParam.fast = 1 - } + metalParam = ElementwiseAddKernel

.metalParamFrom(inputX: param.inputX, inputY: param.inputY, axis: param.axis) if GlobalConfig.shared.computePrecision == .Float32 { if param.mode == "channel" { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift index e52e1e44d18c290cef8f16ae4405774af56c7074..732296493e00d545212a7d1492fcf0e8fc32b653 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift @@ -66,6 +66,7 @@ class MemoryOptimize: MemoryManager { var createdNodes = [String: Node]() var nodesArray = [Node]() let scope = program.scope + var fetchVarNames: [String] = [] func appendNodes(textureDic: [String: [String]], varsDic: [String: PMVarDesc]) { for dicPair in textureDic { for varName in dicPair.value { @@ -94,9 +95,16 @@ class MemoryOptimize: MemoryManager { varsDic[varDesc.name] = varDesc } for op in block.ops { + if op.type == gFetchType { + for names in op.inputs.values { + fetchVarNames.append(contentsOf: names) + } + } appendNodes(textureDic: op.inputs, varsDic: varsDic) + appendNodes(textureDic: op.paraInputs, varsDic: varsDic) appendNodes(textureDic: op.outputs, varsDic: varsDic) appendNodes(textureDic: op.inputs, varsDic: varsDic) + appendNodes(textureDic: op.paraInputs, varsDic: varsDic) } } var nodeGroups: [[Node]] = [] @@ -106,7 +114,8 @@ class MemoryOptimize: MemoryManager { node.visited = true var placed = false for i in 0.. (heap?.size ?? 0) { + if size != (heap?.size ?? 0) { heap?.setPurgeableState(.empty) heap = makeHeapForSize(size) }