From d81d6d11b34367c3798d0fdedcba97f0731ceccc Mon Sep 17 00:00:00 2001 From: NazgulLee Date: Fri, 26 Jul 2019 11:50:03 +0800 Subject: [PATCH] 1.optimize add by channel; 2. texture reuse should consider param input and multi fetch; 3. should also realloc heap when max size become smaller; 4. paddle mobile test disable cache (#1769) * optimize add by channel * 1. texture reuse should consider param input and multi fetch; 2. should also realloc heap when max size become smaller; 3. paddle mobile test disable cache --- .../PaddleMobileTest/TestViewController.swift | 26 ++++++++++++++----- .../paddle-mobile-metallib/Common.metal | 1 + .../ConvAddReluMetal.metal | 8 ++++-- .../paddle-mobile-metallib/Elementwise.metal | 12 ++++----- .../ElementwiseAddPreluKernel.inc.metal | 6 ++--- .../Operators/Kernels/ConvAddReluKernel.swift | 4 +-- .../Src/Operators/Kernels/ConvKernel.swift | 2 +- .../Kernels/ElementwiseAddKernel.swift | 4 +++ .../Kernels/ElementwiseAddPreluKernel.swift | 22 +--------------- .../Src/Program/MemoryOptimze.swift | 13 ++++++++-- 10 files changed, 54 insertions(+), 44 deletions(-) diff --git a/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift b/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift index d8d4c554ae..e1ec72f00a 100644 --- a/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift +++ b/metal/PaddleMobileTest/PaddleMobileTest/TestViewController.swift @@ -63,7 +63,7 @@ let device = MTLCreateSystemDefaultDevice()! let commandQueue = device.makeCommandQueue()! var timeCosts = [Double]() var count = 0 -var totalCount = 10 +var totalCount = 100 var orderedVars: [String] = [] var varIndex = 0 @@ -107,7 +107,9 @@ class TestViewController: UIViewController { } private func getTestInfo() { - Alamofire.request("\(hostUrlStr)/getTestInfo").validate().responseJSON { (response) in + var testInfoRequest = URLRequest(url: URL(string: "\(hostUrlStr)/getTestInfo")!) + testInfoRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(testInfoRequest).validate().responseJSON { (response) in guard response.result.isSuccess else { self.testLog("getTestInfo request error") return @@ -194,7 +196,9 @@ class TestViewController: UIViewController { testResult.model = model let modelUrlStr = "\(hostUrlStr)/getFile/\(model.name)" - Alamofire.request("\(modelUrlStr)/model").validate().responseData { (response) in + var modelRequest = URLRequest(url: URL(string: "\(modelUrlStr)/model")!) + modelRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(modelRequest).validate().responseData { (response) in guard response.result.isSuccess, let modelData = response.result.value else { let msg = "get model \(model.name) error" self.testLog(msg) @@ -205,7 +209,9 @@ class TestViewController: UIViewController { //let modelData2 = try! Data(contentsOf: URL(fileURLWithPath: Bundle.main.path(forResource: "yolo_model_v3_16", ofType: nil)!)) let modelPtr = UnsafeMutablePointer.allocate(capacity: modelData.count) NSData(data: modelData).getBytes(modelPtr, length: modelData.count) - Alamofire.request("\(modelUrlStr)/params/\(model.paramsPrecision)").validate().responseData(completionHandler: { (response) in + var paramsRequest = URLRequest(url: URL(string: "\(modelUrlStr)/params/\(model.paramsPrecision)")!) + paramsRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(paramsRequest).validate().responseData(completionHandler: { (response) in guard response.result.isSuccess, let paramsData = response.result.value else { let msg = "get params \(model.name) error" self.testLog(msg) @@ -244,7 +250,10 @@ class TestViewController: UIViewController { } let fetchVar = fetchVars[0] net.inputDim = Dim(inDim: [dims[0], dims[2], dims[3], dims[1]]) - Alamofire.request("\(modelUrlStr)/data/\(feedVar.name.replacingOccurrences(of: "/", with: "_"))").validate().responseData(completionHandler: { (response) in + + var feedVarRequest = URLRequest(url: URL(string: "\(modelUrlStr)/data/\(feedVar.name.replacingOccurrences(of: "/", with: "_"))")!) + feedVarRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(feedVarRequest).validate().responseData(completionHandler: { (response) in guard response.result.isSuccess, let inputData = response.result.value else { let msg = "get var \(feedVar) error" self.testLog(msg) @@ -309,7 +318,8 @@ class TestViewController: UIViewController { for i in 0.. precision && abs(a - b) / min(abs(a), abs(b)) > 0.05 { + // && abs(a - b) / min(abs(a), abs(b)) > 0.05 + if abs(a - b) > precision { isResultEqual = false msg = "unequal: i: \(i) target: \(output[i]) result: \(resultHolder.result[i])" self.testLog(msg) @@ -403,7 +413,9 @@ class TestViewController: UIViewController { } } if severVars.contains(varName) { - Alamofire.request("\(urlString)/\(varName)").validate().responseData { (response) in + var severVarRequest = URLRequest(url: URL(string: "\(urlString)/\(varName)")!) + severVarRequest.cachePolicy = .reloadIgnoringLocalAndRemoteCacheData + Alamofire.request(severVarRequest).validate().responseData { (response) in varIndex += 1 guard response.result.isSuccess, let varData = response.result.value else { self.compareVars(runner: runner, model: model, completion: completion) diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal index 185370c519..9dacf6dd86 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Common.metal @@ -109,6 +109,7 @@ inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { struct ElementwiseAddParam { int32_t fast; + int32_t addByChannel; int32_t axis; int32_t ylen; int32_t xdim[4]; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal index d487e00fa3..05b8150842 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddReluMetal.metal @@ -19,8 +19,10 @@ using namespace metal; half4 getBiasHalf(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_array biasTexture) { half4 output; - if (addParam.fast) { + if (addParam.fast == 1) { output = biasTexture.read(gid.xy, gid.z); + } else if (addParam.addByChannel == 1) { + output = biasTexture.read(uint2(0, 0), gid.z); } else { int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; @@ -44,8 +46,10 @@ half4 getBiasHalf(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_a float4 getBias(uint3 gid, constant ElementwiseAddParam &addParam, texture2d_array biasTexture) { float4 output; - if (addParam.fast) { + if (addParam.fast == 1) { output = float4(biasTexture.read(gid.xy, gid.z)); + } else if (addParam.addByChannel == 1) { + output = float4(biasTexture.read(uint2(0, 0), gid.z)); } else { int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal index 45559cb0e8..1748eadb79 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/Elementwise.metal @@ -26,12 +26,12 @@ kernel void elementwise_add(texture2d_array inputX [[textur gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; float4 rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; @@ -62,12 +62,12 @@ kernel void elementwise_add_half(texture2d_array inputX [[te gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; half4 rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; diff --git a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal index 65566952ef..bed8763f36 100644 --- a/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal +++ b/metal/paddle-mobile-metallib/paddle-mobile-metallib/ElementwiseAddPreluKernel.inc.metal @@ -37,12 +37,12 @@ kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; VECTOR(P, 4) rx, ry; - + rx = inputX.read(gid.xy, gid.z); if (pm.fast == 1) { - rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); + } else if (pm.addByChannel == 1) { + ry = inputY.read(uint2(0, 0), gid.z); } else { - rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift index 28b78cfe1c..9bdf2c7d42 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift @@ -160,7 +160,7 @@ class ConvAddReluKernel: Kernel, Computable { try setupWithMPS(device: device, param: param) } else { if functionName == nil { - fatalError(" unsupport yet ") + throw PaddleMobileError.makeError(type: .netError, msg: "function name nil") } try super.init(device: device, inFunctionName: functionName, initContext: initContext) try setupWithoutMPS(device: device, param: param) @@ -371,7 +371,7 @@ class ConvAddReluKernel: Kernel, Computable { } private class func canMPSAddByElement(param: ConvAddReluParam

) -> Bool { - if let y = param.y, y.dim.dims == param.input.dim.dims { + if let y = param.y, y.dim.dims == param.output.dim.dims { return true } return false diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift index 8255790ac1..cb9f09b81c 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvKernel.swift @@ -46,7 +46,7 @@ class ConvKernel: Kernel, Computable { try setupWithMPS(device: device, param: param) } else { if functionName == nil { - fatalError(" unsupport yet ") + throw PaddleMobileError.makeError(type: .netError, msg: "function name nil") } try super.init(device: device, inFunctionName: functionName, initContext: initContext) try setupWithoutMPS(device: device, param: param) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift index 15a4e80768..168786e02a 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddKernel.swift @@ -16,6 +16,7 @@ import Foundation struct ElementwiseAddMetalParam { var fast: Int32 = 0 + var addByChannel: Int32 = 0 var axis: Int32 = 0 var ylen: Int32 = 0 var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) @@ -91,6 +92,9 @@ class ElementwiseAddKernel: Kernel, Computable { // print("===> elementwise_add fast!!!") metalParam.fast = 1 } + if inputY.tensorDim.cout() == 1 && (axis == 1 || (axis == -1 && inputY.tensorDim.dims[0] == inputX.padToFourDim[1])) { + metalParam.addByChannel = 1 + } return metalParam } } diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift index ca4892d874..d266f55605 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ElementwiseAddPreluKernel.swift @@ -23,27 +23,7 @@ class ElementwiseAddPreluKernel: Kernel, Computable { try param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision) - metalParam = ElementwiseAddMetalParam.init() - - let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) } - let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) } - let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) } - let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) } - - metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3]) - metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3]) - metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3]) - metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3]) - if param.axis == -1 { - metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout()) - } else { - metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) - } - metalParam.ylen = Int32(param.inputY.tensorDim.cout()) - if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { - // print("===> elementwise_add fast!!!") - metalParam.fast = 1 - } + metalParam = ElementwiseAddKernel

.metalParamFrom(inputX: param.inputX, inputY: param.inputY, axis: param.axis) if GlobalConfig.shared.computePrecision == .Float32 { if param.mode == "channel" { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift index e52e1e44d1..732296493e 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/MemoryOptimze.swift @@ -66,6 +66,7 @@ class MemoryOptimize: MemoryManager { var createdNodes = [String: Node]() var nodesArray = [Node]() let scope = program.scope + var fetchVarNames: [String] = [] func appendNodes(textureDic: [String: [String]], varsDic: [String: PMVarDesc]) { for dicPair in textureDic { for varName in dicPair.value { @@ -94,9 +95,16 @@ class MemoryOptimize: MemoryManager { varsDic[varDesc.name] = varDesc } for op in block.ops { + if op.type == gFetchType { + for names in op.inputs.values { + fetchVarNames.append(contentsOf: names) + } + } appendNodes(textureDic: op.inputs, varsDic: varsDic) + appendNodes(textureDic: op.paraInputs, varsDic: varsDic) appendNodes(textureDic: op.outputs, varsDic: varsDic) appendNodes(textureDic: op.inputs, varsDic: varsDic) + appendNodes(textureDic: op.paraInputs, varsDic: varsDic) } } var nodeGroups: [[Node]] = [] @@ -106,7 +114,8 @@ class MemoryOptimize: MemoryManager { node.visited = true var placed = false for i in 0.. (heap?.size ?? 0) { + if size != (heap?.size ?? 0) { heap?.setPurgeableState(.empty) heap = makeHeapForSize(size) } -- GitLab