From cb945cebbad7a4809c7ada5f1022dee2a0b10101 Mon Sep 17 00:00:00 2001
From: Yanzhan Yang <yangyanzhan@gmail.com>
Date: Thu, 9 May 2019 23:16:50 +0800
Subject: [PATCH] add useAggresiveOptimization param to control newly added
 optimization code branch (#1612)

---
 metal/paddle-mobile/paddle-mobile/API/Net.swift    |  3 +++
 metal/paddle-mobile/paddle-mobile/API/Runner.swift |  1 +
 .../Src/Operators/Base/Operator.swift              |  3 +++
 .../Src/Operators/Kernels/ConvAddKernel.swift      | 11 ++++++-----
 .../Src/Operators/Kernels/ConvAddReluKernel.swift  | 14 +++++++++-----
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/metal/paddle-mobile/paddle-mobile/API/Net.swift b/metal/paddle-mobile/paddle-mobile/API/Net.swift
index fadc6fb60c..aa6b43e9bf 100644
--- a/metal/paddle-mobile/paddle-mobile/API/Net.swift
+++ b/metal/paddle-mobile/paddle-mobile/API/Net.swift
@@ -59,6 +59,9 @@ import Foundation
     /// 是否使用 MetalPerformanceShaders 进行运算, 运算精度为 32 位时不支持开启 MPS
     @objc public var useMPS: Bool = false
     
+    /// 是否使用最高等级的加速策略
+    @objc public var useAggressiveOptimization: Bool = false
+    
     /// 模型精度
     @objc public var paramPrecision: Precision = .Float32
 
diff --git a/metal/paddle-mobile/paddle-mobile/API/Runner.swift b/metal/paddle-mobile/paddle-mobile/API/Runner.swift
index c2f6521075..730acd5947 100644
--- a/metal/paddle-mobile/paddle-mobile/API/Runner.swift
+++ b/metal/paddle-mobile/paddle-mobile/API/Runner.swift
@@ -95,6 +95,7 @@ import Foundation
             initContext.metalLoadMode = net.metalLoadMode
             initContext.metalLibPath = net.metalLibPath
             initContext.useMPS = net.useMPS
+            initContext.useAggresiveOptimization = net.useAggressiveOptimization
 
             switch net.paramPrecision {
             case .Float16:
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
index 32f044c53e..85474cb5a9 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
@@ -70,6 +70,9 @@ public class InitContext {
     /// 是否使用 MetalPerformanceShaders 进行运算
     var useMPS: Bool = false
     
+    /// 是否使用最高等级的加速策略
+    var useAggresiveOptimization: Bool = false
+    
     init() {
         metalLoadMode = .LoadMetalInDefaultLib
         metalLibPath = nil
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
index 155a5b7841..e4fa5b1d67 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
@@ -110,9 +110,11 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
         }
         
         var shouldUseMPS = false
-        let functionName = type(of: self).kernelFunctionName(param: param)
-        if #available(iOS 11.0, *), initContext.useMPS {
-            shouldUseMPS = true
+        let functionName = type(of: self).kernelFunctionName(param: param, useAggressiveOptimization: initContext.useAggresiveOptimization)
+        if #available(iOS 11.0, *), (initContext.useMPS || initContext.useAggresiveOptimization) {
+            if (param.input.tensorDim[1] == 1 || param.input.tensorDim[1] > 4) && (param.output.tensorDim[1] == 1 || param.output.tensorDim[1] > 4) {
+                shouldUseMPS = true
+            }
         }
         if type(of: self).isWinoGrad(functionName: functionName) {
             shouldUseMPS = false
@@ -121,7 +123,6 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
             super.init(device: device, inFunctionName: nil, initContext: initContext)
             setupWithMPS(device: device, param: param)
         } else {
-            
             if functionName == nil {
                 fatalError(" unsupport yet ")
             }
@@ -203,7 +204,7 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
         param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
     }
     
-    open class func kernelFunctionName(param: ConvAddParam<P>) -> String? {
+    open class func kernelFunctionName(param: ConvAddParam<P>, useAggressiveOptimization: Bool = false) -> String? {
         if GlobalConfig.shared.computePrecision == .Float16 {
             if param.filter.width == 1 && param.filter.height == 1 {
                 return "conv_add_1x1_half"
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift
index 13843fd846..fc43a6c17e 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddReluKernel.swift
@@ -10,16 +10,20 @@ import Foundation
 import MetalPerformanceShaders
 
 class ConvAddReluKernel<P: PrecisionProtocol>: ConvAddKernel<P> {
-    override class func kernelFunctionName(param: ConvAddParam<P>) -> String? {
+    override class func kernelFunctionName(param: ConvAddParam<P>, useAggressiveOptimization: Bool = false) -> String? {
         if GlobalConfig.shared.computePrecision == .Float16 {
             if param.filter.width == 1 && param.filter.height == 1 {
                 return "conv_add_relu_1x1_half"
             } else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
-                if param.filter.width == 3 && param.filter.height == 3 && param.stride[0] == 1 && param.stride[1] == 1 && param.filter.n == 16 {
-                    return "depthwise_conv_add_relu_3x3_half_winograd"
-                } else {
-                    return "depthwise_conv_add_relu_3x3_half"
+                if useAggressiveOptimization {
+                    let couldUseWinograd = param.filter.width == 3 && param.filter.height == 3
+                        && param.filter.n == 16 && param.stride[0] == 1 && param.stride[1] == 1
+                        && param.dilations[0] == 1 && param.dilations[1] == 1
+                    if couldUseWinograd {
+                        return "depthwise_conv_add_relu_3x3_half_winograd"
+                    }
                 }
+                return "depthwise_conv_add_relu_3x3_half"
             } else if param.filter.width == 3 && param.filter.height == 3 {
                 return "conv_add_relu_3x3_half"
             } else if param.filter.width == 1 && param.filter.height == 5 {
-- 
GitLab