f

a1940b00 · dolphin8 · d3d811af · a1940b00 · a1940b00 · a1940b00
8 changed file
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
@@ -7,6 +7,8 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; };
+		4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; };
 		D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; };
 		FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; };
@@ -85,6 +87,8 @@
 /* End PBXBuildFile section */

 /* Begin PBXFileReference section */
+		4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = "<group>"; };
+		4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; };
 		CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; };
 		DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
 		E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; };
@@ -349,6 +353,8 @@
 			children = (
 				FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
 				FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
+				4AF928762133F1DB005B6C3A /* BoxCoder.metal */,
+				4AF9287821341661005B6C3A /* Softmax.metal */,
 				FCEB6849212F00DB00D2448E /* PreluKernel.metal */,
 				FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
 				FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
@@ -461,6 +467,7 @@
 			files = (
 				FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
 				FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
+				4AF9287921341661005B6C3A /* Softmax.metal in Sources */,
 				FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
 				FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
 				FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */,
@@ -478,6 +485,7 @@
 				FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
 				FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
 				FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
+				4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
 				FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
 				FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
 				FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,

--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
@@ -14,20 +14,31 @@

 import Foundation

+struct SoftmaxMetalParam {
+  let N: Int32
+  let K: Int32
+}
+
 class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
-    
-    func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
-        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
-            throw PaddleMobileError.predictError(message: " encoder is nil")
-        }
-        encoder.setTexture(param.input.metalTexture, index: 0)
-        encoder.setTexture(param.output.metalTexture, index: 1)
-        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
-        encoder.endEncoding()
+  
+  func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
+    guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+      throw PaddleMobileError.predictError(message: " encoder is nil")
    }
+    encoder.setTexture(param.input.metalTexture, index: 0)
+    encoder.setTexture(param.output.metalTexture, index: 1)
    
-    required init(device: MTLDevice, param: SoftmaxParam<P>) {
-        param.output.initTexture(device: device)
-        super.init(device: device, inFunctionName: "softmax")
-    }
+    var smp = SoftmaxMetalParam.init(
+      N: Int32(param.input.tensorDim[0]),
+      K: Int32(param.input.tensorDim[1])
+    )
+    encoder.setBytes(&smp, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
+    encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+    encoder.endEncoding()
+  }
+  
+  required init(device: MTLDevice, param: SoftmaxParam<P>) {
+    param.output.initTexture(device: device)
+    super.init(device: device, inFunctionName: "softmax")
+  }
 }
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
@@ -9,8 +9,6 @@
 #include <metal_stdlib>
 using namespace metal;

-
-
 inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
  abcd[2] = xyzn[0];
  abcd[1] = xyzn[1];

--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal
@@ -162,51 +162,6 @@ kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)
 }


-kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
-                    texture2d_array<float, access::write> outTexture [[texture(1)]],
-                    uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  int zsize = inTexture.get_array_size();
-  float maxv = inTexture.read(uint2(0, 0), 0)[0];
-  for (int z = 0; z < zsize; z++) {
-    float4 r = inTexture.read(uint2(0, 0), z);
-    maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
-  }
-  float sum = 0;
-  for (int z = 0; z < zsize; z++) {
-    float4 r = inTexture.read(uint2(0, 0), z);
-    sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
-  }
-  float4 rr = inTexture.read(gid.xy, gid.z);
-  rr = exp(rr - maxv) / sum;
-  outTexture.write(rr, gid.xy, gid.z);
-}
-
-
-kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
-                         texture2d_array<half, access::write> outTexture [[texture(1)]],
-                         uint3 gid [[thread_position_in_grid]]) {
-  if (gid.x >= outTexture.get_width() ||
-      gid.y >= outTexture.get_height() ||
-      gid.z >= outTexture.get_array_size()) return;
-  int zsize = inTexture.get_array_size();
-  half maxv = inTexture.read(uint2(0, 0), 0)[0];
-  for (int z = 0; z < zsize; z++) {
-    half4 r = inTexture.read(uint2(0, 0), z);
-    maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
-  }
-  float sum = 0;
-  for (int z = 0; z < zsize; z++) {
-    half4 r = inTexture.read(uint2(0, 0), z);
-    sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
-  }
-  half4 rr = inTexture.read(gid.xy, gid.z);
-  rr = exp(rr - maxv) / sum;
-  outTexture.write(rr, gid.xy, gid.z);
-}
-


 struct TransposeParam {

--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct SoftmaxParam {
+  int N;
+  int K;
+};
+
+kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    constant SoftmaxParam &sp [[buffer(0)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+  if (gid.x >= outTexture.get_width() ||
+      gid.y >= outTexture.get_height() ||
+      gid.z >= outTexture.get_array_size()) return;
+//  int zsize = inTexture.get_array_size();
+  float maxv = inTexture.read(gid.xy, 0)[0];
+  int group = sp.K / 4;
+  int remain = sp.K % 4;
+  for (int z = 0; z < group; z++) {
+    float4 r = inTexture.read(gid.xy, z);
+    maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3]))));
+  }
+  if (remain > 0) {
+    float4 r = inTexture.read(gid.xy, group);
+    for (int i = 0; i < remain; i++) {
+      maxv = max(maxv, r[i]);
+    }
+  }
+  float4 rsum = {0, 0, 0, 0};
+  for (int z = 0; z < group; z++) {
+    float4 r = inTexture.read(gid.xy, z);
+    rsum += exp(r - maxv);
+  }
+  float sum = rsum[0] + rsum[1] + rsum[2] + rsum[3];
+  if (remain > 0) {
+    float4 r = inTexture.read(gid.xy, group);
+    for (int i = 0; i < remain; i++) {
+      sum += exp(r[i] - maxv);
+    }
+  }
+  float4 rr = inTexture.read(gid.xy, gid.z);
+  rr = exp(rr - maxv) / sum;
+  outTexture.write(rr, gid.xy, gid.z);
+}
+//
+//kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+//                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                         uint3 gid [[thread_position_in_grid]]) {
+//  if (gid.x >= outTexture.get_width() ||
+//      gid.y >= outTexture.get_height() ||
+//      gid.z >= outTexture.get_array_size()) return;
+//  int zsize = inTexture.get_array_size();
+//  half maxv = inTexture.read(uint2(0, 0), 0)[0];
+//  for (int z = 0; z < zsize; z++) {
+//    half4 r = inTexture.read(uint2(0, 0), z);
+//    maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+//  }
+//  float sum = 0;
+//  for (int z = 0; z < zsize; z++) {
+//    half4 r = inTexture.read(uint2(0, 0), z);
+//    sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+//  }
+//  half4 rr = inTexture.read(gid.xy, gid.z);
+//  rr = exp(rr - maxv) / sum;
+//  outTexture.write(rr, gid.xy, gid.z);
+//}
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -23,6 +23,7 @@ class ReshapeParam<P: PrecisionType>: OpParam {
      shape = try ReshapeParam.getAttr(key: "shape", attrs: opDesc.attrs)
        
      var s: [Int] = shape.map { Int($0) }
+      
      var di = -1
      var ml = 1
      for i in 0..<s.count {

--- a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
@@ -20,6 +20,13 @@ class SoftmaxParam<P: PrecisionType>: OpParam {
    do {
      input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
      output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
+      
+      assert(input.tensorDim.dims.count == 2)
+      assert(input.transpose == [0, 1, 2, 3])
+      
+      output.dim = input.dim
+      output.tensorDim = input.tensorDim
+      output.originDim = input.originDim
    } catch let error {
      throw error
    }
@@ -48,7 +55,9 @@ class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>,
  }
  func delogOutput() {
    print("softmax delog")
-    let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
-    let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
+    
+    let originDim = para.output.originDim
+    let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
+    print(outputArray.strideArray())
  }
 }
--- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
@@ -49,12 +49,14 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
    }
  }
  func delogOutput() {
-    print(para.input.metalTexture.toTensor(dim: (n: para.input.tensorDim[0], c: para.input.tensorDim[1], h: para.input.tensorDim[2], w: para.input.tensorDim[3])).strideArray())
+    print(para.output.metalTexture.realNHWC(dim: (n: para.output.originDim[0], h: para.output.originDim[1], w: para.output.originDim[2], c: para.output.originDim[3])).strideArray())
    
-    
-    let originDim = para.output.tensorDim
-    let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
-    print(outputArray.strideArray())
+//    print(para.input.metalTexture.toTensor(dim: (n: para.input.originDim[0], c: para.input.originDim[1], h: para.input.originDim[2], w: para.input.originDim[3])).strideArray())
+//
+//
+//    let originDim = para.output.tensorDim
+//    let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
+//    print(outputArray.strideArray())
    
    
 //    let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in