From caec10b7f7ec60df4f46090967ab5a00cfba1e5a Mon Sep 17 00:00:00 2001
From: dolphin8 <haipengwang03@163.com>
Date: Tue, 10 Jul 2018 16:12:34 +0800
Subject: [PATCH] batch norm

---
 .../paddle-mobile/Operators/BatchNormOp.swift |  5 +++
 .../Operators/Kernels/BatchNormKernel.swift   | 35 ++++++++++++++++++-
 .../Operators/Kernels/Kernels.metal           | 11 +++---
 3 files changed, 44 insertions(+), 7 deletions(-)
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
index 3b45d97c30..3761dad60f 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
@@ -48,6 +48,11 @@ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam
     }
     typealias OpType = BatchNormOp<P>
     func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
     }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
index 4c36543abe..fb51966491 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
@@ -15,11 +15,44 @@
 import Foundation
 
 class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
+    var newScale: MTLBuffer
+    var newBias: MTLBuffer
+    
     required init(device: MTLDevice, param: BatchNormParam<P>) {
         super.init(device: device, inFunctionName: "batchnorm")
+        
+        let varianceBuffer = param.inputVariance.buffer
+        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
+        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
+            invStd[i] = 1 / Float32(varianceContents[i] + param.epsilon).squareRoot()
+        }
+        var newScale = device.makeBuffer(param.inputScale.buffer.length)
+        var newBias = device.makeBuffer(param.inputBias.buffer.length)
+        var newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
+        var newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
+        let scale = param.inputScale.buffer
+        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
+        let bias = param.inputBias.buffer
+        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<(scaleContents.lengh / MemoryLayout<P>.stride) {
+            newScaleContents[i] = invStd[i] * scaleContents[i]
+            newBiasContents[i] = biasContents[i] - meanContents[i] * invStd[i] * scaleContents[i]
+        }
     }
     
     func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
-        
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        print("BatchNorm compute")
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 0)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
     }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
index 9b202174de..82b9fa2ab0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
@@ -60,16 +60,16 @@ kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[text
     outTexture.write(input, gid.xy, gid.z);
 }
 
-
-
-
 kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                 texture2d_array<half, access::write> outTexture [[texture(1)]],
-                 uint3 gid [[thread_position_in_grid]]) {
+                      texture2d_array<half, access::write> outTexture [[texture(1)]],
+                      const device half4 * newScale [[buffer(0)]],
+                      const device half4 * newBias [[buffer(1)]],
+                      uint3 gid [[thread_position_in_grid]]) {
     if (gid.x >= outTexture.get_width() ||
         gid.y >= outTexture.get_height() ||
         gid.z >= outTexture.get_array_size()) return;
     const half4 input = inTexture.read(gid.xy, gid.z);
+    half4 output = input * newScale[gid.z] + newBias[gid.z];
     outTexture.write(input, gid.xy, gid.z);
 }
 
@@ -85,4 +85,3 @@ kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[text
 }
 
 
-
-- 
GitLab