提交 cb945ceb 编写于 作者: Y Yanzhan Yang 提交者: GitHub

add useAggresiveOptimization param to control newly added optimization code branch (#1612)

上级 048a8ebe
...@@ -59,6 +59,9 @@ import Foundation ...@@ -59,6 +59,9 @@ import Foundation
/// 是否使用 MetalPerformanceShaders 进行运算, 运算精度为 32 位时不支持开启 MPS /// 是否使用 MetalPerformanceShaders 进行运算, 运算精度为 32 位时不支持开启 MPS
@objc public var useMPS: Bool = false @objc public var useMPS: Bool = false
/// 是否使用最高等级的加速策略
@objc public var useAggressiveOptimization: Bool = false
/// 模型精度 /// 模型精度
@objc public var paramPrecision: Precision = .Float32 @objc public var paramPrecision: Precision = .Float32
......
...@@ -95,6 +95,7 @@ import Foundation ...@@ -95,6 +95,7 @@ import Foundation
initContext.metalLoadMode = net.metalLoadMode initContext.metalLoadMode = net.metalLoadMode
initContext.metalLibPath = net.metalLibPath initContext.metalLibPath = net.metalLibPath
initContext.useMPS = net.useMPS initContext.useMPS = net.useMPS
initContext.useAggresiveOptimization = net.useAggressiveOptimization
switch net.paramPrecision { switch net.paramPrecision {
case .Float16: case .Float16:
......
...@@ -70,6 +70,9 @@ public class InitContext { ...@@ -70,6 +70,9 @@ public class InitContext {
/// 是否使用 MetalPerformanceShaders 进行运算 /// 是否使用 MetalPerformanceShaders 进行运算
var useMPS: Bool = false var useMPS: Bool = false
/// 是否使用最高等级的加速策略
var useAggresiveOptimization: Bool = false
init() { init() {
metalLoadMode = .LoadMetalInDefaultLib metalLoadMode = .LoadMetalInDefaultLib
metalLibPath = nil metalLibPath = nil
......
...@@ -110,9 +110,11 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable { ...@@ -110,9 +110,11 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
} }
var shouldUseMPS = false var shouldUseMPS = false
let functionName = type(of: self).kernelFunctionName(param: param) let functionName = type(of: self).kernelFunctionName(param: param, useAggressiveOptimization: initContext.useAggresiveOptimization)
if #available(iOS 11.0, *), initContext.useMPS { if #available(iOS 11.0, *), (initContext.useMPS || initContext.useAggresiveOptimization) {
shouldUseMPS = true if (param.input.tensorDim[1] == 1 || param.input.tensorDim[1] > 4) && (param.output.tensorDim[1] == 1 || param.output.tensorDim[1] > 4) {
shouldUseMPS = true
}
} }
if type(of: self).isWinoGrad(functionName: functionName) { if type(of: self).isWinoGrad(functionName: functionName) {
shouldUseMPS = false shouldUseMPS = false
...@@ -121,7 +123,6 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable { ...@@ -121,7 +123,6 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
super.init(device: device, inFunctionName: nil, initContext: initContext) super.init(device: device, inFunctionName: nil, initContext: initContext)
setupWithMPS(device: device, param: param) setupWithMPS(device: device, param: param)
} else { } else {
if functionName == nil { if functionName == nil {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
} }
...@@ -203,7 +204,7 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable { ...@@ -203,7 +204,7 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision) param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
} }
open class func kernelFunctionName(param: ConvAddParam<P>) -> String? { open class func kernelFunctionName(param: ConvAddParam<P>, useAggressiveOptimization: Bool = false) -> String? {
if GlobalConfig.shared.computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
return "conv_add_1x1_half" return "conv_add_1x1_half"
......
...@@ -10,16 +10,20 @@ import Foundation ...@@ -10,16 +10,20 @@ import Foundation
import MetalPerformanceShaders import MetalPerformanceShaders
class ConvAddReluKernel<P: PrecisionProtocol>: ConvAddKernel<P> { class ConvAddReluKernel<P: PrecisionProtocol>: ConvAddKernel<P> {
override class func kernelFunctionName(param: ConvAddParam<P>) -> String? { override class func kernelFunctionName(param: ConvAddParam<P>, useAggressiveOptimization: Bool = false) -> String? {
if GlobalConfig.shared.computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
return "conv_add_relu_1x1_half" return "conv_add_relu_1x1_half"
} else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] { } else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
if param.filter.width == 3 && param.filter.height == 3 && param.stride[0] == 1 && param.stride[1] == 1 && param.filter.n == 16 { if useAggressiveOptimization {
return "depthwise_conv_add_relu_3x3_half_winograd" let couldUseWinograd = param.filter.width == 3 && param.filter.height == 3
} else { && param.filter.n == 16 && param.stride[0] == 1 && param.stride[1] == 1
return "depthwise_conv_add_relu_3x3_half" && param.dilations[0] == 1 && param.dilations[1] == 1
if couldUseWinograd {
return "depthwise_conv_add_relu_3x3_half_winograd"
}
} }
return "depthwise_conv_add_relu_3x3_half"
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
return "conv_add_relu_3x3_half" return "conv_add_relu_3x3_half"
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册